Repository: WZMIAOMIAO/deep-learning-for-image-processing
Branch: master
Commit: 1ec3fe6f374f
Files: 593
Total size: 3.3 MB

Directory structure:
gitextract_1s0rnibl/

├── .github/
│   └── ISSUE_TEMPLATE/
│       └── issue-template.md
├── .gitignore
├── LICENSE
├── README.md
├── article_link/
│   └── README.md
├── course_ppt/
│   └── README.md
├── data_set/
│   ├── README.md
│   └── split_data.py
├── deploying_service/
│   ├── deploying_pytorch/
│   │   ├── convert_onnx_cls/
│   │   │   ├── class_indices.json
│   │   │   ├── main.py
│   │   │   └── model.py
│   │   ├── convert_openvino/
│   │   │   ├── convert_resnet34/
│   │   │   │   ├── README.md
│   │   │   │   ├── compare_fps.py
│   │   │   │   ├── compare_onnx_and_ir.py
│   │   │   │   ├── convert_pytorch2onnx.py
│   │   │   │   ├── model.py
│   │   │   │   ├── quantization_int8.py
│   │   │   │   ├── requirements.txt
│   │   │   │   └── utils.py
│   │   │   └── convert_yolov5/
│   │   │       ├── README.md
│   │   │       ├── compare_fps.py
│   │   │       ├── compare_onnx_and_ir.py
│   │   │       ├── draw_box_utils.py
│   │   │       ├── evaluation.py
│   │   │       ├── predict.py
│   │   │       ├── quantization_int8.py
│   │   │       ├── requirements.txt
│   │   │       └── utils.py
│   │   ├── convert_tensorrt/
│   │   │   └── convert_resnet34/
│   │   │       ├── compare_onnx_and_trt.py
│   │   │       ├── convert_pytorch2onnx.py
│   │   │       ├── my_dataset.py
│   │   │       ├── quantization.py
│   │   │       └── utils.py
│   │   └── pytorch_flask_service/
│   │       ├── class_indices.json
│   │       ├── main.py
│   │       ├── model.py
│   │       ├── requirements.txt
│   │       └── templates/
│   │           └── up.html
│   └── pruning_model_pytorch/
│       ├── class_indices.json
│       ├── main.py
│       ├── model.py
│       ├── predict.py
│       └── train.py
├── others_project/
│   ├── draw_dilated_conv/
│   │   └── main.py
│   ├── kmeans_anchors/
│   │   ├── main.py
│   │   ├── plot_kmeans.py
│   │   ├── read_voc.py
│   │   └── yolo_kmeans.py
│   ├── openvinotest/
│   │   └── openvino_cls_test/
│   │       ├── class_indices.json
│   │       ├── create_imagenet_annotation.py
│   │       ├── float32vsint8.py
│   │       ├── main.py
│   │       ├── model.py
│   │       └── speed_test.py
│   ├── readPbFile/
│   │   ├── README.md
│   │   ├── pascal_label_map.pbtxt
│   │   ├── readPb.py
│   │   ├── test_images/
│   │   │   └── image_info.txt
│   │   └── using_function.py
│   ├── textcnnKeras/
│   │   ├── dataGenerator.py
│   │   ├── data_link.txt
│   │   ├── main.py
│   │   └── models.py
│   └── trans_widerface_to_xml/
│       ├── create_xml.py
│       └── main.py
├── pytorch_classification/
│   ├── ConfusionMatrix/
│   │   ├── class_indices.json
│   │   ├── main.py
│   │   └── model.py
│   ├── ConvNeXt/
│   │   ├── README.md
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── MobileViT/
│   │   ├── README.md
│   │   ├── model.py
│   │   ├── model_config.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── train.py
│   │   ├── transformer.py
│   │   ├── unfold_test.py
│   │   └── utils.py
│   ├── README.md
│   ├── Test10_regnet/
│   │   ├── README.md
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── pretrain_weights.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── Test11_efficientnetV2/
│   │   ├── README.md
│   │   ├── class_indices.json
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── train.py
│   │   ├── trans_effv2_weights.py
│   │   └── utils.py
│   ├── Test1_official_demo/
│   │   ├── model.py
│   │   ├── predict.py
│   │   └── train.py
│   ├── Test2_alexnet/
│   │   ├── class_indices.json
│   │   ├── model.py
│   │   ├── predict.py
│   │   └── train.py
│   ├── Test3_vggnet/
│   │   ├── class_indices.json
│   │   ├── model.py
│   │   ├── predict.py
│   │   └── train.py
│   ├── Test4_googlenet/
│   │   ├── class_indices.json
│   │   ├── model.py
│   │   ├── predict.py
│   │   └── train.py
│   ├── Test5_resnet/
│   │   ├── README.md
│   │   ├── batch_predict.py
│   │   ├── class_indices.json
│   │   ├── load_weights.py
│   │   ├── model.py
│   │   ├── predict.py
│   │   └── train.py
│   ├── Test6_mobilenet/
│   │   ├── class_indices.json
│   │   ├── model_v2.py
│   │   ├── model_v3.py
│   │   ├── predict.py
│   │   └── train.py
│   ├── Test7_shufflenet/
│   │   ├── README.md
│   │   ├── class_indices.json
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── Test8_densenet/
│   │   ├── README.md
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── Test9_efficientNet/
│   │   ├── README.md
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── train.py
│   │   ├── trans_weights_to_pytorch.py
│   │   └── utils.py
│   ├── analyze_weights_featuremap/
│   │   ├── alexnet_model.py
│   │   ├── analyze_feature_map.py
│   │   ├── analyze_kernel_weight.py
│   │   └── resnet_model.py
│   ├── custom_dataset/
│   │   ├── main.py
│   │   ├── my_dataset.py
│   │   └── utils.py
│   ├── grad_cam/
│   │   ├── README.md
│   │   ├── imagenet1k_classes.txt
│   │   ├── imagenet21k_classes.txt
│   │   ├── main_cnn.py
│   │   ├── main_swin.py
│   │   ├── main_vit.py
│   │   ├── swin_model.py
│   │   ├── utils.py
│   │   └── vit_model.py
│   ├── mini_imagenet/
│   │   ├── README.md
│   │   ├── imagenet_class_index.json
│   │   ├── model.py
│   │   ├── multi_train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── distributed_utils.py
│   │   │   └── train_eval_utils.py
│   │   ├── my_dataset.py
│   │   ├── restructure_csv.py
│   │   ├── train_multi_gpu_using_launch.py
│   │   └── train_single_gpu.py
│   ├── model_complexity/
│   │   ├── main.py
│   │   ├── model.py
│   │   └── utils.py
│   ├── swin_transformer/
│   │   ├── README.md
│   │   ├── create_confusion_matrix.py
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── select_incorrect_samples.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── tensorboard_test/
│   │   ├── data_utils.py
│   │   ├── model.py
│   │   ├── my_dataset.py
│   │   ├── requirements.txt
│   │   ├── train.py
│   │   └── train_eval_utils.py
│   ├── train_multi_GPU/
│   │   ├── README.md
│   │   ├── model.py
│   │   ├── multi_train_utils/
│   │   │   ├── distributed_utils.py
│   │   │   └── train_eval_utils.py
│   │   ├── my_dataset.py
│   │   ├── plot_results.py
│   │   ├── requirements.txt
│   │   ├── train_multi_gpu_using_launch.py
│   │   ├── train_multi_gpu_using_spawn.py
│   │   ├── train_single_gpu.py
│   │   └── utils.py
│   └── vision_transformer/
│       ├── README.md
│       ├── flops.py
│       ├── my_dataset.py
│       ├── predict.py
│       ├── train.py
│       ├── utils.py
│       └── vit_model.py
├── pytorch_keypoint/
│   ├── DeepPose/
│   │   ├── README.md
│   │   ├── datasets.py
│   │   ├── export_onnx.py
│   │   ├── model.py
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── distributed_utils.py
│   │   │   ├── losses.py
│   │   │   ├── metrics.py
│   │   │   └── train_eval_utils.py
│   │   ├── transforms.py
│   │   ├── utils.py
│   │   └── wflw_horizontal_flip_indices.py
│   └── HRNet/
│       ├── README.md
│       ├── draw_utils.py
│       ├── model/
│       │   ├── __init__.py
│       │   └── hrnet.py
│       ├── my_dataset_coco.py
│       ├── person_keypoints.json
│       ├── plot_curve.py
│       ├── predict.py
│       ├── requirements.txt
│       ├── train.py
│       ├── train_multi_GPU.py
│       ├── train_utils/
│       │   ├── __init__.py
│       │   ├── coco_eval.py
│       │   ├── coco_utils.py
│       │   ├── distributed_utils.py
│       │   ├── group_by_aspect_ratio.py
│       │   ├── loss.py
│       │   └── train_eval_utils.py
│       ├── transforms.py
│       └── validation.py
├── pytorch_object_detection/
│   ├── faster_rcnn/
│   │   ├── README.md
│   │   ├── backbone/
│   │   │   ├── __init__.py
│   │   │   ├── feature_pyramid_network.py
│   │   │   ├── mobilenetv2_model.py
│   │   │   ├── resnet50_fpn_model.py
│   │   │   └── vgg_model.py
│   │   ├── change_backbone_with_fpn.py
│   │   ├── change_backbone_without_fpn.py
│   │   ├── draw_box_utils.py
│   │   ├── my_dataset.py
│   │   ├── network_files/
│   │   │   ├── __init__.py
│   │   │   ├── boxes.py
│   │   │   ├── det_utils.py
│   │   │   ├── faster_rcnn_framework.py
│   │   │   ├── image_list.py
│   │   │   ├── roi_head.py
│   │   │   ├── rpn_function.py
│   │   │   └── transform.py
│   │   ├── pascal_voc_classes.json
│   │   ├── plot_curve.py
│   │   ├── predict.py
│   │   ├── record_mAP.txt
│   │   ├── requirements.txt
│   │   ├── split_data.py
│   │   ├── train_mobilenetv2.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_res50_fpn.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── coco_eval.py
│   │   │   ├── coco_utils.py
│   │   │   ├── distributed_utils.py
│   │   │   ├── group_by_aspect_ratio.py
│   │   │   └── train_eval_utils.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── mask_rcnn/
│   │   ├── README.md
│   │   ├── backbone/
│   │   │   ├── __init__.py
│   │   │   ├── feature_pyramid_network.py
│   │   │   └── resnet50_fpn_model.py
│   │   ├── coco91_indices.json
│   │   ├── det_results20220406-141544.txt
│   │   ├── draw_box_utils.py
│   │   ├── my_dataset_coco.py
│   │   ├── my_dataset_voc.py
│   │   ├── network_files/
│   │   │   ├── __init__.py
│   │   │   ├── boxes.py
│   │   │   ├── det_utils.py
│   │   │   ├── faster_rcnn_framework.py
│   │   │   ├── image_list.py
│   │   │   ├── mask_rcnn.py
│   │   │   ├── roi_head.py
│   │   │   ├── rpn_function.py
│   │   │   └── transform.py
│   │   ├── pascal_voc_indices.json
│   │   ├── plot_curve.py
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── seg_results20220406-141544.txt
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── coco_eval.py
│   │   │   ├── coco_utils.py
│   │   │   ├── distributed_utils.py
│   │   │   ├── group_by_aspect_ratio.py
│   │   │   └── train_eval_utils.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── retinaNet/
│   │   ├── README.md
│   │   ├── backbone/
│   │   │   ├── __init__.py
│   │   │   ├── feature_pyramid_network.py
│   │   │   └── resnet50_fpn_model.py
│   │   ├── draw_box_utils.py
│   │   ├── my_dataset.py
│   │   ├── network_files/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_utils.py
│   │   │   ├── boxes.py
│   │   │   ├── det_utils.py
│   │   │   ├── image_list.py
│   │   │   ├── losses.py
│   │   │   ├── retinanet.py
│   │   │   └── transform.py
│   │   ├── pascal_voc_classes.json
│   │   ├── plot_curve.py
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── results20210421-142632.txt
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── coco_eval.py
│   │   │   ├── coco_utils.py
│   │   │   ├── distributed_utils.py
│   │   │   ├── group_by_aspect_ratio.py
│   │   │   └── train_eval_utils.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── ssd/
│   │   ├── README.md
│   │   ├── draw_box_utils.py
│   │   ├── my_dataset.py
│   │   ├── pascal_voc_classes.json
│   │   ├── plot_curve.py
│   │   ├── predict_test.py
│   │   ├── record_mAP.txt
│   │   ├── requirements.txt
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   ├── res50_backbone.py
│   │   │   ├── ssd_model.py
│   │   │   └── utils.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_ssd300.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── coco_eval.py
│   │   │   ├── coco_utils.py
│   │   │   ├── distributed_utils.py
│   │   │   ├── group_by_aspect_ratio.py
│   │   │   └── train_eval_utils.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── train_coco_dataset/
│   │   ├── README.md
│   │   ├── backbone/
│   │   │   ├── __init__.py
│   │   │   ├── feature_pyramid_network.py
│   │   │   ├── mobilenetv2_model.py
│   │   │   ├── resnet.py
│   │   │   ├── resnet50_fpn_model.py
│   │   │   └── vgg_model.py
│   │   ├── change_backbone_with_fpn.py
│   │   ├── coco91_indices.json
│   │   ├── compute_receptive_field.py
│   │   ├── draw_box_utils.py
│   │   ├── my_dataset.py
│   │   ├── network_files/
│   │   │   ├── __init__.py
│   │   │   ├── boxes.py
│   │   │   ├── det_utils.py
│   │   │   ├── faster_rcnn_framework.py
│   │   │   ├── image_list.py
│   │   │   ├── roi_head.py
│   │   │   ├── rpn_function.py
│   │   │   └── transform.py
│   │   ├── plot_curve.py
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── results20220408-201436.txt
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── coco_eval.py
│   │   │   ├── distributed_utils.py
│   │   │   ├── group_by_aspect_ratio.py
│   │   │   └── train_eval_utils.py
│   │   ├── transforms.py
│   │   └── validation.py
│   └── yolov3_spp/
│       ├── README.md
│       ├── build_utils/
│       │   ├── __init__.py
│       │   ├── datasets.py
│       │   ├── img_utils.py
│       │   ├── layers.py
│       │   ├── parse_config.py
│       │   ├── torch_utils.py
│       │   └── utils.py
│       ├── calculate_dataset.py
│       ├── cfg/
│       │   ├── hyp.yaml
│       │   └── yolov3-spp.cfg
│       ├── draw_box_utils.py
│       ├── export_onnx.py
│       ├── load_onnx_test.py
│       ├── models.py
│       ├── predict_test.py
│       ├── requirements.txt
│       ├── results20210515-152935.txt
│       ├── train.py
│       ├── train_multi_GPU.py
│       ├── train_utils/
│       │   ├── __init__.py
│       │   ├── coco_eval.py
│       │   ├── coco_utils.py
│       │   ├── distributed_utils.py
│       │   ├── group_by_aspect_ratio.py
│       │   └── train_eval_utils.py
│       ├── trans_voc2yolo.py
│       └── validation.py
├── pytorch_segmentation/
│   ├── deeplab_v3/
│   │   ├── README.md
│   │   ├── get_palette.py
│   │   ├── my_dataset.py
│   │   ├── palette.json
│   │   ├── pascal_voc_classes.json
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── results20211027-104607.txt
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   ├── deeplabv3_model.py
│   │   │   ├── mobilenet_backbone.py
│   │   │   └── resnet_backbone.py
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── distributed_utils.py
│   │   │   └── train_and_eval.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── fcn/
│   │   ├── README.md
│   │   ├── get_palette.py
│   │   ├── my_dataset.py
│   │   ├── palette.json
│   │   ├── pascal_voc_classes.json
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── results20210918-122740.txt
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   ├── backbone.py
│   │   │   └── fcn_model.py
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── distributed_utils.py
│   │   │   └── train_and_eval.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── lraspp/
│   │   ├── README.md
│   │   ├── get_palette.py
│   │   ├── my_dataset.py
│   │   ├── palette.json
│   │   ├── pascal_voc_classes.json
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── results20211028-105233.txt
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   ├── lraspp_model.py
│   │   │   └── mobilenet_backbone.py
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── distributed_utils.py
│   │   │   └── train_and_eval.py
│   │   ├── transforms.py
│   │   └── validation.py
│   ├── u2net/
│   │   ├── README.md
│   │   ├── convert_weight.py
│   │   ├── my_dataset.py
│   │   ├── predict.py
│   │   ├── requirements.txt
│   │   ├── results20220723-123632.txt
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   └── model.py
│   │   ├── train.py
│   │   ├── train_multi_GPU.py
│   │   ├── train_utils/
│   │   │   ├── __init__.py
│   │   │   ├── distributed_utils.py
│   │   │   └── train_and_eval.py
│   │   ├── transforms.py
│   │   └── validation.py
│   └── unet/
│       ├── README.md
│       ├── compute_mean_std.py
│       ├── my_dataset.py
│       ├── predict.py
│       ├── requirements.txt
│       ├── results20220109-165837.txt
│       ├── src/
│       │   ├── __init__.py
│       │   ├── mobilenet_unet.py
│       │   ├── unet.py
│       │   └── vgg_unet.py
│       ├── train.py
│       ├── train_multi_GPU.py
│       ├── train_utils/
│       │   ├── __init__.py
│       │   ├── dice_coefficient_loss.py
│       │   ├── distributed_utils.py
│       │   └── train_and_eval.py
│       └── transforms.py
├── summary_problem.md
└── tensorflow_classification/
    ├── ConfusionMatrix/
    │   ├── class_indices.json
    │   ├── main.py
    │   └── model.py
    ├── ConvNeXt/
    │   ├── model.py
    │   ├── predict.py
    │   ├── train.py
    │   ├── trans_weights.py
    │   └── utils.py
    ├── README.md
    ├── Test11_efficientnetV2/
    │   ├── model.py
    │   ├── predict.py
    │   ├── train.py
    │   ├── trans_weights.py
    │   └── utils.py
    ├── Test1_official_demo/
    │   ├── model.py
    │   └── train.py
    ├── Test2_alexnet/
    │   ├── class_indices.json
    │   ├── fine_train_alexnet.py
    │   ├── model.py
    │   ├── predict.py
    │   ├── read_pth.py
    │   ├── train.py
    │   └── trainGPU.py
    ├── Test3_vgg/
    │   ├── class_indices.json
    │   ├── fine_train_vgg16.py
    │   ├── model.py
    │   ├── predict.py
    │   ├── read_ckpt.py
    │   ├── train.py
    │   └── trainGPU.py
    ├── Test4_goolenet/
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── model_add_bn.py
    │   ├── predict.py
    │   ├── read_pth.py
    │   ├── train.py
    │   ├── trainGPU.py
    │   └── train_add_bn.py
    ├── Test5_resnet/
    │   ├── batch_predict.py
    │   ├── class_indices.json
    │   ├── model.py
    │   ├── predict.py
    │   ├── read_ckpt.py
    │   ├── read_h5.py
    │   ├── subclassed_model.py
    │   ├── train.py
    │   └── trainGPU.py
    ├── Test6_mobilenet/
    │   ├── model_v2.py
    │   ├── model_v3.py
    │   ├── predict.py
    │   ├── read_ckpt.py
    │   ├── trainGPU_mobilenet_v2.py
    │   ├── train_mobilenet_v2.py
    │   ├── train_mobilenet_v3.py
    │   ├── trans_v3_weights.py
    │   └── utils.py
    ├── Test7_shuffleNet/
    │   ├── model.py
    │   ├── predict.py
    │   ├── train.py
    │   ├── trans_weights.py
    │   └── utils.py
    ├── Test9_efficientNet/
    │   ├── model.py
    │   ├── predict.py
    │   ├── train.py
    │   └── utils.py
    ├── analyze_weights_featuremap/
    │   ├── alexnet_model.py
    │   ├── analyze_feature_map.py
    │   └── analyze_kernel_weight.py
    ├── custom_dataset/
    │   ├── train_fit.py
    │   └── utils.py
    ├── swin_transformer/
    │   ├── model.py
    │   ├── predict.py
    │   ├── train.py
    │   ├── trans_weights.py
    │   └── utils.py
    ├── tensorboard_test/
    │   ├── train_fit.py
    │   └── train_not_fit.py
    └── vision_transformer/
        ├── predict.py
        ├── train.py
        ├── trans_weights.py
        ├── utils.py
        └── vit_model.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/ISSUE_TEMPLATE/issue-template.md
================================================
---
name: Issue template
about: Use this template for reporting your problem
title: ''
labels: ''
assignees: ''

---

**System information**
* Have I written custom code:
* OS Platform(e.g., window10 or Linux Ubuntu 16.04):
* Python version:
* Deep learning framework and version(e.g., Tensorflow2.1 or Pytorch1.3):
* Use GPU or not:
* CUDA/cuDNN version(if you use GPU):
* The network you trained(e.g., Resnet34 network):

**Describe the current behavior**

**Error info / logs**


================================================
FILE: .gitignore
================================================
##ignore this file##
*.idea
__pycache__
*.zip
flower_data
*.h5
*.pth
*.pt
*.jpg
*.ckpt.*
*.ckpt
*.config
*.gz
*.onnx
*.xml
*.bin
*.mapping
*.csv
checkpoint
data
VOCdevkit
ssd_resnet50_v1_fpn_shared_box_predictor
runs


================================================
FILE: LICENSE
================================================
                    GNU GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU General Public License is a free, copyleft license for
software and other kinds of works.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.  We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors.  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights.  Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received.  You must make sure that they, too, receive
or can get the source code.  And you must show them these terms so they
know their rights.

  Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.

  For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software.  For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.

  Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so.  This is fundamentally incompatible with the aim of
protecting users' freedom to change the software.  The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable.  Therefore, we
have designed this version of the GPL to prohibit the practice for those
products.  If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.

  Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary.  To prevent this, the GPL assures that
patents cannot be used to render the program non-free.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Use with the GNU Affero General Public License.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:

    <program>  Copyright (C) <year>  <name of author>
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.

  The GNU General Public License does not permit incorporating your program
into proprietary programs.  If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.  But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.


================================================
FILE: README.md
================================================
# 深度学习在图像处理中的应用教程

## 前言
* 本教程是对本人研究生期间的研究内容进行整理总结，总结的同时也希望能够帮助更多的小伙伴。后期如果有学习到新的知识也会与大家一起分享。
* 本教程会以视频的方式进行分享，教学流程如下：  
1）介绍网络的结构与创新点  
2）使用Pytorch进行网络的搭建与训练  
3）使用Tensorflow（内部的keras模块）进行网络的搭建与训练 
* 课程中所有PPT都放在`course_ppt`文件夹下，需要的自行下载。


## 教程目录，点击跳转相应视频（后期会根据学习内容增加）

* 图像分类
  * LeNet（已完成）
    * [Pytorch官方demo(Lenet)](https://www.bilibili.com/video/BV187411T7Ye)
    * [Tensorflow2官方demo](https://www.bilibili.com/video/BV1n7411T7o6)

  * AlexNet（已完成）
    * [AlexNet网络讲解](https://www.bilibili.com/video/BV1p7411T7Pc)
    * [Pytorch搭建AlexNet](https://www.bilibili.com/video/BV1W7411T7qc)
    * [Tensorflow2搭建Alexnet](https://www.bilibili.com/video/BV1s7411T7vs)

  * VggNet（已完成）
    * [VggNet网络讲解](https://www.bilibili.com/video/BV1q7411T7Y6)
    * [Pytorch搭建VGG网络](https://www.bilibili.com/video/BV1i7411T7ZN)
    * [Tensorflow2搭建VGG网络](https://www.bilibili.com/video/BV1q7411T76b)

  * GoogLeNet（已完成）
    * [GoogLeNet网络讲解](https://www.bilibili.com/video/BV1z7411T7ie)
    * [Pytorch搭建GoogLeNet网络](https://www.bilibili.com/video/BV1r7411T7M5)
    * [Tensorflow2搭建GoogLeNet网络](https://www.bilibili.com/video/BV1a7411T7Ht)

  * ResNet（已完成）
    * [ResNet网络讲解](https://www.bilibili.com/video/BV1T7411T7wa)
    * [Pytorch搭建ResNet网络](https://www.bilibili.com/video/BV14E411H7Uw)
    * [Tensorflow2搭建ResNet网络](https://www.bilibili.com/video/BV1WE41177Ya)

  * ResNeXt (已完成)
    * [ResNeXt网络讲解](https://www.bilibili.com/video/BV1Ap4y1p71v/)
    * [Pytorch搭建ResNeXt网络](https://www.bilibili.com/video/BV1rX4y1N7tE)

  * MobileNet_V1_V2（已完成）
    * [MobileNet_V1_V2网络讲解](https://www.bilibili.com/video/BV1yE411p7L7)
    * [Pytorch搭建MobileNetV2网络](https://www.bilibili.com/video/BV1qE411T7qZ)
    * [Tensorflow2搭建MobileNetV2网络](https://www.bilibili.com/video/BV1NE411K7tX)

  * MobileNet_V3（已完成）
    * [MobileNet_V3网络讲解](https://www.bilibili.com/video/BV1GK4y1p7uE)
    * [Pytorch搭建MobileNetV3网络](https://www.bilibili.com/video/BV1zT4y1P7pd)
    * [Tensorflow2搭建MobileNetV3网络](https://www.bilibili.com/video/BV1KA411g7wX)

  * ShuffleNet_V1_V2 (已完成)
    * [ShuffleNet_V1_V2网络讲解](https://www.bilibili.com/video/BV15y4y1Y7SY)
    * [使用Pytorch搭建ShuffleNetV2](https://www.bilibili.com/video/BV1dh411r76X)
    * [使用Tensorflow2搭建ShuffleNetV2](https://www.bilibili.com/video/BV1kr4y1N7bh)

  * EfficientNet_V1（已完成）
    * [EfficientNet网络讲解](https://www.bilibili.com/video/BV1XK4y1U7PX)
    * [使用Pytorch搭建EfficientNet](https://www.bilibili.com/video/BV19z4y1179h/)
    * [使用Tensorflow2搭建EfficientNet](https://www.bilibili.com/video/BV1PK4y1S7Jf)

  * EfficientNet_V2 (已完成)
    * [EfficientNetV2网络讲解](https://www.bilibili.com/video/BV19v41157AU)
    * [使用Pytorch搭建EfficientNetV2](https://www.bilibili.com/video/BV1Xy4y1g74u)
    * [使用Tensorflow搭建EfficientNetV2](https://www.bilibili.com/video/BV19K4y1g7m4)
  
  * RepVGG（已完成）
    * [RepVGG网络讲解](https://www.bilibili.com/video/BV15f4y1o7QR)

  * Vision Transformer(已完成)
    * [Multi-Head Attention讲解](https://www.bilibili.com/video/BV15v411W78M)
    * [Vision Transformer网络讲解](https://www.bilibili.com/video/BV1Jh411Y7WQ)
    * [使用Pytorch搭建Vision Transformer](https://www.bilibili.com/video/BV1AL411W7dT)
    * [使用tensorflow2搭建Vision Transformer](https://www.bilibili.com/video/BV1q64y1X7GY)

  * Swin Transformer(已完成)
    * [Swin Transformer网络讲解](https://www.bilibili.com/video/BV1pL4y1v7jC)
    * [使用Pytorch搭建Swin Transformer](https://www.bilibili.com/video/BV1yg411K7Yc)
    * [使用Tensorflow2搭建Swin Transformer](https://www.bilibili.com/video/BV1bR4y1t7qT)

  * ConvNeXt(已完成)
    * [ConvNeXt网络讲解](https://www.bilibili.com/video/BV1SS4y157fu)
    * [使用Pytorch搭建ConvNeXt](https://www.bilibili.com/video/BV14S4y1L791)
    * [使用Tensorflow2搭建ConvNeXt](https://www.bilibili.com/video/BV1TS4y1V7Gz)

  * MobileViT(已完成)
    * [MobileViT网络讲解](https://www.bilibili.com/video/BV1TG41137sb)
    * [使用Pytorch搭建MobileViT](https://www.bilibili.com/video/BV1ae411L7Ki)

* 目标检测
  * Faster-RCNN/FPN（已完成）
    * [Faster-RCNN网络讲解](https://www.bilibili.com/video/BV1af4y1m7iL)
    * [FPN网络讲解](https://www.bilibili.com/video/BV1dh411U7D9)
    * [Faster-RCNN源码解析(Pytorch)](https://www.bilibili.com/video/BV1of4y1m7nj)

  * SSD/RetinaNet (已完成)
    * [SSD网络讲解](https://www.bilibili.com/video/BV1fT4y1L7Gi)
    * [RetinaNet网络讲解](https://www.bilibili.com/video/BV1Q54y1L7sM)
    * [SSD源码解析(Pytorch)](https://www.bilibili.com/video/BV1vK411H771)

  * YOLO Series (已完成)
    * [YOLO系列网络讲解(V1~V3)](https://www.bilibili.com/video/BV1yi4y1g7ro)
    * [YOLOv3 SPP源码解析(Pytorch版)](https://www.bilibili.com/video/BV1t54y1C7ra)
    * [YOLOV4网络讲解](https://www.bilibili.com/video/BV1NF41147So)
    * [YOLOV5网络讲解](https://www.bilibili.com/video/BV1T3411p7zR)
    * [YOLOX 网络讲解](https://www.bilibili.com/video/BV1JW4y1k76c)
  
  * FCOS（已完成）
    * [FCOS网络讲解](https://www.bilibili.com/video/BV1G5411X7jw)

* 语义分割 
  * FCN (已完成)
    * [FCN网络讲解](https://www.bilibili.com/video/BV1J3411C7zd)
    * [FCN源码解析(Pytorch版)](https://www.bilibili.com/video/BV19q4y1971Q)

  * DeepLabV3 (已完成)
    * [DeepLabV1网络讲解](https://www.bilibili.com/video/BV1SU4y1N7Ao)
    * [DeepLabV2网络讲解](https://www.bilibili.com/video/BV1gP4y1G7TC)
    * [DeepLabV3网络讲解](https://www.bilibili.com/video/BV1Jb4y1q7j7)
    * [DeepLabV3源码解析(Pytorch版)](https://www.bilibili.com/video/BV1TD4y1c7Wx)

  * LR-ASPP (已完成)
    * [LR-ASPP网络讲解](https://www.bilibili.com/video/BV1LS4y1M76E)
    * [LR-ASPP源码解析(Pytorch版)](https://www.bilibili.com/video/bv13D4y1F7ML)
  
  * U-Net (已完成)
    * [U-Net网络讲解](https://www.bilibili.com/video/BV1Vq4y127fB/)
    * [U-Net源码解析(Pytorch版)](https://www.bilibili.com/video/BV1Vq4y127fB)
  
  * U2Net (已完成)
    * [U2Net网络讲解](https://www.bilibili.com/video/BV1yB4y1z7mj)
    * [U2Net源码解析(Pytorch版)](https://www.bilibili.com/video/BV1Kt4y137iS)

* 实例分割
  * Mask R-CNN（已完成）
    * [Mask R-CNN网络讲解](https://www.bilibili.com/video/BV1ZY411774T)
    * [Mask R-CNN源码解析(Pytorch版)](https://www.bilibili.com/video/BV1hY411E7wD)

* 关键点检测
  * DeepPose（已完成）
    * [DeepPose网络讲解](https://www.bilibili.com/video/BV1bm421g7aJ)
    * [DeepPose源码解析(Pytorch版)](https://www.bilibili.com/video/BV1bm421g7aJ)

  * HRNet（已完成）
    * [HRNet网络讲解](https://www.bilibili.com/video/BV1bB4y1y7qP)
    * [HRNet源码解析(Pytorch版)](https://www.bilibili.com/video/BV1ar4y157JM)

**[更多相关视频请进入我的bilibili频道查看](https://space.bilibili.com/18161609/channel/index)**

---

欢迎大家关注下我的微信公众号（**阿喆学习小记**），平时会总结些相关学习博文。    

如果有什么问题，也可以到我的CSDN中一起讨论。
[https://blog.csdn.net/qq_37541097/article/details/103482003](https://blog.csdn.net/qq_37541097/article/details/103482003)

我的bilibili频道：
[https://space.bilibili.com/18161609/channel/index](https://space.bilibili.com/18161609/channel/index)


================================================
FILE: article_link/README.md
================================================
# 文献链接

## 图像分类(Classification)
- LeNet [http://yann.lecun.com/exdb/lenet/index.html](http://yann.lecun.com/exdb/lenet/index.html)
- AlexNet [http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
- ZFNet(Visualizing and Understanding Convolutional Networks) [https://arxiv.org/abs/1311.2901](https://arxiv.org/abs/1311.2901)
- VGG [https://arxiv.org/abs/1409.1556](https://arxiv.org/abs/1409.1556)
- GoogLeNet, Inceptionv1(Going deeper with convolutions) [https://arxiv.org/abs/1409.4842](https://arxiv.org/abs/1409.4842)
- Batch Normalization [https://arxiv.org/abs/1502.03167](https://arxiv.org/abs/1502.03167)
- Inceptionv3(Rethinking the Inception Architecture for Computer Vision) [https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567)
- Inceptionv4, Inception-ResNet [https://arxiv.org/abs/1602.07261](https://arxiv.org/abs/1602.07261)
- Xception(Deep Learning with Depthwise Separable Convolutions) [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357)
- ResNet [https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385)
- ResNeXt [https://arxiv.org/abs/1611.05431](https://arxiv.org/abs/1611.05431)
- DenseNet [https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993)
- NASNet-A(Learning Transferable Architectures for Scalable Image Recognition) [https://arxiv.org/abs/1707.07012](https://arxiv.org/abs/1707.07012)
- SENet(Squeeze-and-Excitation Networks) [https://arxiv.org/abs/1709.01507](https://arxiv.org/abs/1709.01507)
- MobileNet(v1) [https://arxiv.org/abs/1704.04861](https://arxiv.org/abs/1704.04861)
- MobileNet(v2) [https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381)
- MobileNet(v3) [https://arxiv.org/abs/1905.02244](https://arxiv.org/abs/1905.02244)
- ShuffleNet(v1) [https://arxiv.org/abs/1707.01083](https://arxiv.org/abs/1707.01083)
- ShuffleNet(v2) [https://arxiv.org/abs/1807.11164](https://arxiv.org/abs/1807.11164)
- Bag of Tricks for Image Classification with Convolutional Neural Networks [https://arxiv.org/abs/1812.01187](https://arxiv.org/abs/1812.01187)
- EfficientNet(v1) [https://arxiv.org/abs/1905.11946](https://arxiv.org/abs/1905.11946)
- EfficientNet(v2) [https://arxiv.org/abs/2104.00298](https://arxiv.org/abs/2104.00298)
- CSPNet [https://arxiv.org/abs/1911.11929](https://arxiv.org/abs/1911.11929)
- RegNet [https://arxiv.org/abs/2003.13678](https://arxiv.org/abs/2003.13678)
- NFNets(High-Performance Large-Scale Image Recognition Without Normalization) [https://arxiv.org/abs/2102.06171](https://arxiv.org/abs/2102.06171)
- Vision Transformer [https://arxiv.org/abs/2010.11929](https://arxiv.org/abs/2010.11929)
- DeiT(Training data-efficient image transformers ) [https://arxiv.org/abs/2012.12877](https://arxiv.org/abs/2012.12877)
- Swin Transformer [https://arxiv.org/abs/2103.14030](https://arxiv.org/abs/2103.14030)
- Swin Transformer V2: Scaling Up Capacity and Resolution [https://arxiv.org/abs/2111.09883](https://arxiv.org/abs/2111.09883)
- BEiT: BERT Pre-Training of Image Transformers [https://arxiv.org/abs/2106.08254](https://arxiv.org/abs/2106.08254)
- MAE(Masked Autoencoders Are Scalable Vision Learners) [https://arxiv.org/abs/2111.06377](https://arxiv.org/abs/2111.06377)
- ConvNeXt(A ConvNet for the 2020s) [https://arxiv.org/abs/2201.03545](https://arxiv.org/abs/2201.03545)
- MobileViT V1 [https://arxiv.org/abs/2110.02178](https://arxiv.org/abs/2110.02178)
- MobileViT V2(Separable Self-attention for Mobile Vision Transformers) [https://arxiv.org/abs/2206.02680](https://arxiv.org/abs/2206.02680)
- MobileOne(An Improved One millisecond Mobile Backbone) [https://arxiv.org/abs/2206.04040](https://arxiv.org/abs/2206.04040)


## 目标检测(Object Detection)
- R-CNN [https://arxiv.org/abs/1311.2524](https://arxiv.org/abs/1311.2524)
- Fast R-CNN [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083)
- Faster R-CNN [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497)
- Cascade R-CNN: Delving into High Quality Object Detection [https://arxiv.org/abs/1712.00726](https://arxiv.org/abs/1712.00726)
- Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870)
- SSD [https://arxiv.org/abs/1512.02325](https://arxiv.org/abs/1512.02325)
- FPN(Feature Pyramid Networks for Object Detection) [https://arxiv.org/abs/1612.03144](https://arxiv.org/abs/1612.03144)
- RetinaNet(Focal Loss for Dense Object Detection) [https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002)
- Bag of Freebies for Training Object Detection Neural Networks [https://arxiv.org/abs/1902.04103](https://arxiv.org/abs/1902.04103)
- YOLOv1 [https://arxiv.org/abs/1506.02640](https://arxiv.org/abs/1506.02640)
- YOLOv2 [https://arxiv.org/abs/1612.08242](https://arxiv.org/abs/1612.08242)
- YOLOv3 [https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767)
- YOLOv4 [https://arxiv.org/abs/2004.10934](https://arxiv.org/abs/2004.10934)
- YOLOX(Exceeding YOLO Series in 2021) [https://arxiv.org/abs/2107.08430](https://arxiv.org/abs/2107.08430)
- YOLOv7 [https://arxiv.org/abs/2207.02696](https://arxiv.org/abs/2207.02696)
- PP-YOLO [https://arxiv.org/abs/2007.12099](https://arxiv.org/abs/2007.12099)
- PP-YOLOv2 [https://arxiv.org/abs/2104.10419](https://arxiv.org/abs/2104.10419)
- CornerNet [https://arxiv.org/abs/1808.01244](https://arxiv.org/abs/1808.01244)
- FCOS(Old) [https://arxiv.org/abs/1904.01355](https://arxiv.org/abs/1904.01355)
- FCOS(New) [https://arxiv.org/abs/2006.09214](https://arxiv.org/abs/2006.09214)
- CenterNet [https://arxiv.org/abs/1904.07850](https://arxiv.org/abs/1904.07850)


## 语义分割(Semantic Segmentation)
- FCN(Fully Convolutional Networks for Semantic Segmentation) [https://arxiv.org/abs/1411.4038](https://arxiv.org/abs/1411.4038)
- UNet(U-Net: Convolutional Networks for Biomedical Image Segmentation) [https://arxiv.org/abs/1505.04597](https://arxiv.org/abs/1505.04597)
- DeepLabv1(Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs) [https://arxiv.org/abs/1412.7062](https://arxiv.org/abs/1412.7062)
- DeepLabv2(Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs) [https://arxiv.org/abs/1606.00915](https://arxiv.org/abs/1606.00915)
- DeepLabv3(Rethinking Atrous Convolution for Semantic Image Segmentation) [https://arxiv.org/abs/1706.05587](https://arxiv.org/abs/1706.05587)
- DeepLabv3+(Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation) [https://arxiv.org/abs/1802.02611](https://arxiv.org/abs/1802.02611)
- SegFormer [https://arxiv.org/abs/2105.15203](https://arxiv.org/abs/2105.15203)


## 显著性目标检测(Salient Object Detection)
- U2Net [https://arxiv.org/abs/2005.09007](https://arxiv.org/abs/2005.09007)


## 实例分割(Instance Segmentation)
- Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870)


## 关键点检测(Keypoint Detection)
- HRNet(Deep High-Resolution Representation Learning for Human Pose Estimation) [https://arxiv.org/abs/1902.09212](https://arxiv.org/abs/1902.09212)

## 网络量化(Quantization)
- Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference [https://arxiv.org/abs/1712.05877](https://arxiv.org/abs/1712.05877)
- Quantizing deep convolutional networks for efficient inference: A whitepaper [https://arxiv.org/abs/1806.08342](https://arxiv.org/abs/1806.08342)
- Data-Free Quantization Through Weight Equalization and Bias Correction [https://arxiv.org/abs/1906.04721](https://arxiv.org/abs/1906.04721)
- LSQ: Learned Step Size Quantization [https://arxiv.org/abs/1902.08153](https://arxiv.org/abs/1902.08153)
- LSQ+: Improving low-bit quantization through learnable offsets and better initialization [https://arxiv.org/abs/2004.09576](https://arxiv.org/abs/2004.09576)


## 自然语言处理
- Attention Is All You Need [https://arxiv.org/abs/1706.03762](https://arxiv.org/abs/1706.03762)

## Others
- Microsoft COCO: Common Objects in Context [https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312)
- The PASCALVisual Object Classes Challenge: A Retrospective [http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf)
- Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization [https://arxiv.org/abs/1610.02391](https://arxiv.org/abs/1610.02391)


================================================
FILE: course_ppt/README.md
================================================
# 为了精简项目，课程中的所有ppt都已转存至百度云

**所有PPT都放在该文件夹中** 链接：https://pan.baidu.com/s/1VL6QTQ86sfY2aMDVo4Z-kg 提取码：4ydw

**下面为单独每个ppt的链接**：
## 分类网络相关
- **AlexNet** 链接: https://pan.baidu.com/s/1RJn5lzY8LwrmckUPvXcjmg  提取码: 34ue
- **VGG** 链接: https://pan.baidu.com/s/1BnYpdaDwAIcgRm7YwakEZw  提取码: 8ev0
- **GoogleNet** 链接: https://pan.baidu.com/s/1XjZXprvayV3dDMvLjoOk3A  提取码: 9hq4
- **ResNet** 链接: https://pan.baidu.com/s/1I2LUlwCSjNKr37T0n3NKzg  提取码: f1s9
- **ResNext** 链接：https://pan.baidu.com/s/1-anFYX5572MJmiQym9D4Eg 提取码：f8ob 
- **MobileNet_v1_v2** 链接: https://pan.baidu.com/s/1ReDDCuK8wyH0XqniUgiSYQ  提取码: ipqv
- **MobileNet_v3**  链接：https://pan.baidu.com/s/13mzSpyxuA4T4ki7kEN1Xqw 提取码：fp5g 
- **ShuffleNet_v1_v2** 链接：https://pan.baidu.com/s/1-DDwePMPCDvjw08YU8nAAA 提取码：ad6n
- **EfficientNet_v1** 链接：https://pan.baidu.com/s/1Sep9W0vLzfjhcHAXr6Bv0Q  提取码：eufl 
- **EfficientNet_v2** 链接：https://pan.baidu.com/s/1tesrgY4CHLmq6P7s7TcHCw  提取码：y2kz
- **Transformer** 链接：https://pan.baidu.com/s/1DE6RDySr7NS0HQ35gBqP_g 提取码：y9e7
- **Vision Transformer** 链接：https://pan.baidu.com/s/1wzpHG8EK5gxg6UCMscYqMw 提取码：cm1m
- **Swin Transformer** 链接：https://pan.baidu.com/s/1O6XEEZUb6B6AGYON7-EOgA 提取码：qkrn
- **ConvNeXt** 链接：https://pan.baidu.com/s/1mgZjkirJPZ8huVls-O0xXA  提取码：kvqx
- **RepVGG** 链接：https://pan.baidu.com/s/1uJP3hCHI79-tUdBNR_VAWQ  提取码：qe8a
- **MobileViT** 链接：https://pan.baidu.com/s/1F8QJtFhTPWX8Vjr8_97scQ  提取码：lfn5
- **ConfusionMatrix** 链接: https://pan.baidu.com/s/1EtKzHkZyv2XssYtqmGYCLg  提取码: uoo5
- **Grad-CAM** 链接：https://pan.baidu.com/s/1ZHKBW7hINQXFI36hBYdC0Q  提取码：aru7


## 目标检测网络相关
- **R-CNN** 链接: https://pan.baidu.com/s/1l_ZxkfJdyp3KoMLqwWbx5A  提取码: nm1l
- **Fast R-CNN** 链接: https://pan.baidu.com/s/1Pe_Tg43OVo-yZWj7t-_L6Q  提取码: fe73
- **Faster R-CNN** 链接：https://pan.baidu.com/s/1Dd0d_LY8l7Y1YkHQhp-WfA  提取码：vzp4
- **FPN** 链接：https://pan.baidu.com/s/1O9H0iqQMg9f_FZezUEKZ9g 提取码：qbl8 
- **SSD** 链接: https://pan.baidu.com/s/15zF3GhIdg-E_tZX2Y2X-rw  提取码: u7k1
- **RetinaNet**  链接：https://pan.baidu.com/s/1beW612VCSnSu-v8iu_2-fA 提取码：vqbu 
- **YOLOv1** 链接: https://pan.baidu.com/s/1vVyUNQHYEGjqosezlx_1Mg  提取码: b3i0
- **YOLOv2** 链接: https://pan.baidu.com/s/132aW1e_NYbaxxGi3cDVLYg  提取码: tak7
- **YOLOv3** 链接：https://pan.baidu.com/s/1hZqdgh7wA7QeGAYTttlVOQ  提取码：5ulo
- **YOLOv3SPP** 链接: https://pan.baidu.com/s/15LRssnPez9pn6jRpW89Wlw  提取码: nv9f
- **YOLOv4** 链接：https://pan.baidu.com/s/1Ltw4v1pg0eZNFYR2ZBbZmQ  提取码：qjx4
- **YOLOv5** 链接：https://pan.baidu.com/s/1rnvjwHLvOlJ9KpJ5z95GWw  提取码：kt04
- **YOLOX** 链接：https://pan.baidu.com/s/1ex54twQC7hBE3szNko_K5A  提取码：al0r
- **FCOS** 链接: https://pan.baidu.com/s/1KUc9dzvAbtwtGGm3ZZy_cw  提取码: h0as
- **Calculate mAP** 链接: https://pan.baidu.com/s/1jdA_n78J7nSUoOg6TTO5Bg  提取码: eh62
- **coco数据集简介** 链接：https://pan.baidu.com/s/1HfCvjt-8o9j5a916IYNVjw  提取码：6rec 


## 图像分割网络相关
- **语义分割前言** 链接：https://pan.baidu.com/s/1cwxe2wbaA_2DqNYADq3myA 提取码：zzij
- **转置卷积** 链接：https://pan.baidu.com/s/1A8688168fuWHyxJQtzupHw 提取码：pgnf
- **FCN** 链接：https://pan.baidu.com/s/1XLUneTLrdUyDAiV6kqi9rw 提取码：126a
- **膨胀卷积** 链接：https://pan.baidu.com/s/1QlQyniuMhBeXyEK420MIdQ 提取码：ry6p
- **DeepLab V1** 链接：https://pan.baidu.com/s/1NFxb7ADQOMVYLxmIKqTONQ  提取码：500s
- **DeepLab V2** 链接：https://pan.baidu.com/s/1woe3lJYBVkOdnn6XXlKf8g 提取码：76ec
- **DeepLab V3** 链接：https://pan.baidu.com/s/1WVBgc2Ld13D0_dkHGwhTpA 提取码：m54m
- **UNet** 链接: https://pan.baidu.com/s/1WDwI-DuzYklMvwyRxVUXjA 提取码: rd4j
- **U2Net**  链接：https://pan.baidu.com/s/1ekbEm4dsjlFamK8dCs8yfA  提取码：472j


## 实例分割
- **Mask R-CNN** 链接：https://pan.baidu.com/s/1JpQ7ENEv_x9A1-O_NpjwYA 提取码：1t4i

## 关键点检测
- **HRNet** 链接: https://pan.baidu.com/s/1-8AJdU82K1j70KZK_rN7aQ  提取码: t4me


================================================
FILE: data_set/README.md
================================================
## 该文件夹是用来存放训练数据的目录
### 使用步骤如下：
* （1）在data_set文件夹下创建新文件夹"flower_data"
* （2）点击链接下载花分类数据集 [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz)
* （3）解压数据集到flower_data文件夹下
* （4）执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val    

```
├── flower_data   
       ├── flower_photos（解压的数据集文件夹，3670个样本）  
       ├── train（生成的训练集，3306个样本）  
       └── val（生成的验证集，364个样本） 
```


================================================
FILE: data_set/split_data.py
================================================
import os
from shutil import copy, rmtree
import random


def mk_file(file_path: str):
    if os.path.exists(file_path):
        # 如果文件夹存在，则先删除原文件夹在重新创建
        rmtree(file_path)
    os.makedirs(file_path)


def main():
    # 保证随机可复现
    random.seed(0)

    # 将数据集中10%的数据划分到验证集中
    split_rate = 0.1

    # 指向你解压后的flower_photos文件夹
    cwd = os.getcwd()
    data_root = os.path.join(cwd, "flower_data")
    origin_flower_path = os.path.join(data_root, "flower_photos")
    assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path)

    flower_class = [cla for cla in os.listdir(origin_flower_path)
                    if os.path.isdir(os.path.join(origin_flower_path, cla))]

    # 建立保存训练集的文件夹
    train_root = os.path.join(data_root, "train")
    mk_file(train_root)
    for cla in flower_class:
        # 建立每个类别对应的文件夹
        mk_file(os.path.join(train_root, cla))

    # 建立保存验证集的文件夹
    val_root = os.path.join(data_root, "val")
    mk_file(val_root)
    for cla in flower_class:
        # 建立每个类别对应的文件夹
        mk_file(os.path.join(val_root, cla))

    for cla in flower_class:
        cla_path = os.path.join(origin_flower_path, cla)
        images = os.listdir(cla_path)
        num = len(images)
        # 随机采样验证集的索引
        eval_index = random.sample(images, k=int(num*split_rate))
        for index, image in enumerate(images):
            if image in eval_index:
                # 将分配至验证集中的文件复制到相应目录
                image_path = os.path.join(cla_path, image)
                new_path = os.path.join(val_root, cla)
                copy(image_path, new_path)
            else:
                # 将分配至训练集中的文件复制到相应目录
                image_path = os.path.join(cla_path, image)
                new_path = os.path.join(train_root, cla)
                copy(image_path, new_path)
            print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="")  # processing bar
        print()

    print("processing done!")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_onnx_cls/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: deploying_service/deploying_pytorch/convert_onnx_cls/main.py
================================================
from PIL import Image
import torchvision.transforms as transforms
import torch
import torch.onnx
import onnx
import onnxruntime
import numpy as np
from model import resnet34

device = torch.device("cpu")


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def main(save_path=None):
    assert isinstance(save_path, str), "lack of save_path parameter..."
    # create model
    model = resnet34(num_classes=5)
    # load model weights
    model_weight_path = "./resNet34.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    # input to the model
    # [batch, channel, height, width]
    x = torch.rand(1, 3, 224, 224, requires_grad=True)
    torch_out = model(x)

    # export the model
    torch.onnx.export(model,                       # model being run
                      x,                           # model input (or a tuple for multiple inputs)
                      save_path,                   # where to save the model (can be a file or file-like object)
                      export_params=True,          # store the trained parameter weights inside the model file
                      opset_version=10,            # the ONNX version to export the model to
                      do_constant_folding=True,    # whether to execute constant folding for optimization
                      input_names=["input"],       # the model's input names
                      output_names=["output"],     # the model's output names
                      dynamic_axes={"input": {0: "batch_size"},  # variable length axes
                                    "output": {0: "batch_size"}})

    # check onnx model
    onnx_model = onnx.load(save_path)
    onnx.checker.check_model(onnx_model)

    ort_session = onnxruntime.InferenceSession(save_path)

    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
    ort_outs = ort_session.run(None, ort_inputs)

    # compare ONNX Runtime and Pytorch results
    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.
    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with ONNXRuntime, and the result looks good!")

    # load test image
    img = Image.open("../tulip.jpg")

    # pre-process
    preprocess = transforms.Compose([transforms.Resize([224, 224]),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    img = preprocess(img)
    img = img.unsqueeze_(0)

    # feed image into onnx model
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)}
    ort_outs = ort_session.run(None, ort_inputs)
    prediction = ort_outs[0]

    # np softmax process
    prediction -= np.max(prediction, keepdims=True)  # 为了稳定地计算softmax概率， 一般会减掉最大元素
    prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True)
    print(prediction)


if __name__ == '__main__':
    onnx_file_name = "resnet34.onnx"
    main(save_path=onnx_file_name)


================================================
FILE: deploying_service/deploying_pytorch/convert_onnx_cls/model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/README.md
================================================
本项目展示如何将Pytorch中的ResNet34网络转成Openvino的IR格式，并进行量化处理，具体使用流程如下：
1. 按照`requirements.txt`配置环境
2. 下载事先训练好的ResNet34权重（之前在花分类数据集上训练得到的）放在当前文件夹下。百度云链接: https://pan.baidu.com/s/1x4WFX1HynYcXLium3UaaFQ  密码: qvi6
3. 使用`convert_pytorch2onnx.py`将Resnet34转成ONNX格式
4. 在命令行中使用以下指令将ONNX转成IR格式：
```
mo  --input_model resnet34.onnx \
    --input_shape "[1,3,224,224]" \
    --mean_values="[123.675,116.28,103.53]" \
    --scale_values="[58.395,57.12,57.375]" \
    --data_type FP32 \
    --output_dir ir_output
```
5. 下载并解压花分类数据集，将`quantization_int8.py`中的`data_path`指向解压后的`flower_photos`
6. 使用`quantization_int8.py`量化模型

================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/compare_fps.py
================================================
import time
import numpy as np
import torch
import onnxruntime
import matplotlib.pyplot as plt
from openvino.runtime import Core
from torchvision.models import resnet34


def normalize(image: np.ndarray) -> np.ndarray:
    """
    Normalize the image to the given mean and standard deviation
    """
    image = image.astype(np.float32)
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    image /= 255.0
    image -= mean
    image /= std
    return image


def onnx_inference(onnx_path: str, image: np.ndarray, num_images: int = 20):
    # load onnx model
    ort_session = onnxruntime.InferenceSession(onnx_path)

    # compute onnx Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: image}

    start = time.perf_counter()
    for _ in range(num_images):
        ort_session.run(None, ort_inputs)
    end = time.perf_counter()
    time_onnx = end - start
    print(
        f"ONNX model in Inference Engine/CPU: {time_onnx / num_images:.3f} "
        f"seconds per image, FPS: {num_images / time_onnx:.2f}"
    )

    return num_images / time_onnx


def ir_inference(ir_path: str, image: np.ndarray, num_images: int = 20):
    # Load the network in Inference Engine
    ie = Core()
    model_ir = ie.read_model(model=ir_path)
    compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")

    # Get input and output layers
    input_layer_ir = next(iter(compiled_model_ir.inputs))
    output_layer_ir = next(iter(compiled_model_ir.outputs))

    start = time.perf_counter()
    request_ir = compiled_model_ir.create_infer_request()
    for _ in range(num_images):
        request_ir.infer(inputs={input_layer_ir.any_name: image})
    end = time.perf_counter()
    time_ir = end - start
    print(
        f"IR model in Inference Engine/CPU: {time_ir / num_images:.3f} "
        f"seconds per image, FPS: {num_images / time_ir:.2f}"
    )

    return num_images / time_ir


def pytorch_inference(image: np.ndarray, num_images: int = 20):
    image = torch.as_tensor(image, dtype=torch.float32)

    model = resnet34(pretrained=False, num_classes=5)
    model.eval()

    with torch.no_grad():
        start = time.perf_counter()
        for _ in range(num_images):
            model(image)
        end = time.perf_counter()
        time_torch = end - start

    print(
        f"PyTorch model on CPU: {time_torch / num_images:.3f} seconds per image, "
        f"FPS: {num_images / time_torch:.2f}"
    )

    return num_images / time_torch


def plot_fps(v: dict):
    x = list(v.keys())
    y = list(v.values())

    plt.bar(range(len(x)), y, align='center')
    plt.xticks(range(len(x)), x)
    for i, v in enumerate(y):
        plt.text(x=i, y=v+0.5, s=f"{v:.2f}", ha='center')
    plt.xlabel('model format')
    plt.ylabel('fps')
    plt.title('FPS comparison')
    plt.show()
    plt.savefig('fps_vs.jpg')


def main():
    image_h = 224
    image_w = 224
    onnx_path = "resnet34.onnx"
    ir_path = "ir_output/resnet34.xml"

    image = np.random.randn(image_h, image_w, 3)
    normalized_image = normalize(image)

    # Convert the resized images to network input shape
    # [h, w, c] -> [c, h, w] -> [1, c, h, w]
    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)
    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)

    onnx_fps = onnx_inference(onnx_path, normalized_input_image, num_images=100)
    ir_fps = ir_inference(ir_path, input_image, num_images=100)
    pytorch_fps = pytorch_inference(normalized_input_image, num_images=100)
    plot_fps({"pytorch": round(pytorch_fps, 2),
              "onnx": round(onnx_fps, 2),
              "ir": round(ir_fps, 2)})


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/compare_onnx_and_ir.py
================================================
import numpy as np
import onnxruntime
from openvino.runtime import Core


def normalize(image: np.ndarray) -> np.ndarray:
    """
    Normalize the image to the given mean and standard deviation
    """
    image = image.astype(np.float32)
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    image /= 255.0
    image -= mean
    image /= std
    return image


def onnx_inference(onnx_path: str, image: np.ndarray):
    # load onnx model
    ort_session = onnxruntime.InferenceSession(onnx_path)

    # compute onnx Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: image}
    res_onnx = ort_session.run(None, ort_inputs)[0]
    return res_onnx


def ir_inference(ir_path: str, image: np.ndarray):
    # Load the network in Inference Engine
    ie = Core()
    model_ir = ie.read_model(model=ir_path)
    compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")

    # Get input and output layers
    input_layer_ir = next(iter(compiled_model_ir.inputs))
    output_layer_ir = next(iter(compiled_model_ir.outputs))

    # Run inference on the input image
    res_ir = compiled_model_ir([image])[output_layer_ir]
    return res_ir


def main():
    image_h = 224
    image_w = 224
    onnx_path = "resnet34.onnx"
    ir_path = "ir_output/resnet34.xml"

    image = np.random.randn(image_h, image_w, 3)
    normalized_image = normalize(image)

    # Convert the resized images to network input shape
    # [h, w, c] -> [c, h, w] -> [1, c, h, w]
    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)
    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)

    onnx_res = onnx_inference(onnx_path, normalized_input_image)
    ir_res = ir_inference(ir_path, input_image)
    np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with OpenvinoRuntime, and the result looks good!")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/convert_pytorch2onnx.py
================================================
import torch
import torch.onnx
import onnx
import onnxruntime
import numpy as np
from torchvision.models import resnet34

device = torch.device("cpu")


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def main():
    weights_path = "resNet34(flower).pth"
    onnx_file_name = "resnet34.onnx"
    batch_size = 1
    img_h = 224
    img_w = 224
    img_channel = 3

    # create model and load pretrain weights
    model = resnet34(pretrained=False, num_classes=5)
    model.load_state_dict(torch.load(weights_path, map_location='cpu'))

    model.eval()
    # input to the model
    # [batch, channel, height, width]
    x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True)
    torch_out = model(x)

    # export the model
    torch.onnx.export(model,             # model being run
                      x,                 # model input (or a tuple for multiple inputs)
                      onnx_file_name,    # where to save the model (can be a file or file-like object)
                      verbose=False)

    # check onnx model
    onnx_model = onnx.load(onnx_file_name)
    onnx.checker.check_model(onnx_model)

    ort_session = onnxruntime.InferenceSession(onnx_file_name)

    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
    ort_outs = ort_session.run(None, ort_inputs)

    # compare ONNX Runtime and Pytorch results
    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.
    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with ONNXRuntime, and the result looks good!")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/model.py
================================================
from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer(inplace=True))


class SqueezeExcitation(nn.Module):
    def __init__(self, input_c: int, squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)
        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)
        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = F.relu(scale, inplace=True)
        scale = self.fc2(scale)
        scale = F.hardsigmoid(scale, inplace=True)
        return scale * x


class InvertedResidualConfig:
    def __init__(self,
                 input_c: int,
                 kernel: int,
                 expanded_c: int,
                 out_c: int,
                 use_se: bool,
                 activation: str,
                 stride: int,
                 width_multi: float):
        self.input_c = self.adjust_channels(input_c, width_multi)
        self.kernel = kernel
        self.expanded_c = self.adjust_channels(expanded_c, width_multi)
        self.out_c = self.adjust_channels(out_c, width_multi)
        self.use_se = use_se
        self.use_hs = activation == "HS"  # whether using h-swish activation
        self.stride = stride

    @staticmethod
    def adjust_channels(channels: int, width_multi: float):
        return _make_divisible(channels * width_multi, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        layers: List[nn.Module] = []
        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU

        # expand
        if cnf.expanded_c != cnf.input_c:
            layers.append(ConvBNActivation(cnf.input_c,
                                           cnf.expanded_c,
                                           kernel_size=1,
                                           norm_layer=norm_layer,
                                           activation_layer=activation_layer))

        # depthwise
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.expanded_c,
                                       kernel_size=cnf.kernel,
                                       stride=cnf.stride,
                                       groups=cnf.expanded_c,
                                       norm_layer=norm_layer,
                                       activation_layer=activation_layer))

        if cnf.use_se:
            layers.append(SqueezeExcitation(cnf.expanded_c))

        # project
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.out_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Identity))

        self.block = nn.Sequential(*layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        if self.use_res_connect:
            result += x

        return result


class MobileNetV3(nn.Module):
    def __init__(self,
                 inverted_residual_setting: List[InvertedResidualConfig],
                 last_channel: int,
                 num_classes: int = 1000,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super(MobileNetV3, self).__init__()

        if not inverted_residual_setting:
            raise ValueError("The inverted_residual_setting should not be empty.")
        elif not (isinstance(inverted_residual_setting, List) and
                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
            raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers: List[nn.Module] = []

        # building first layer
        firstconv_output_c = inverted_residual_setting[0].input_c
        layers.append(ConvBNActivation(3,
                                       firstconv_output_c,
                                       kernel_size=3,
                                       stride=2,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.append(block(cnf, norm_layer))

        # building last several layers
        lastconv_input_c = inverted_residual_setting[-1].out_c
        lastconv_output_c = 6 * lastconv_input_c
        layers.append(ConvBNActivation(lastconv_input_c,
                                       lastconv_output_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),
                                        nn.Hardswish(inplace=True),
                                        nn.Dropout(p=0.2, inplace=True),
                                        nn.Linear(last_channel, num_classes))

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def mobilenet_v3_large(num_classes: int = 1000,
                       reduced_tail: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_conf(16, 3, 16, 16, False, "RE", 1),
        bneck_conf(16, 3, 64, 24, False, "RE", 2),  # C1
        bneck_conf(24, 3, 72, 24, False, "RE", 1),
        bneck_conf(24, 5, 72, 40, True, "RE", 2),  # C2
        bneck_conf(40, 5, 120, 40, True, "RE", 1),
        bneck_conf(40, 5, 120, 40, True, "RE", 1),
        bneck_conf(40, 3, 240, 80, False, "HS", 2),  # C3
        bneck_conf(80, 3, 200, 80, False, "HS", 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1),
        bneck_conf(80, 3, 480, 112, True, "HS", 1),
        bneck_conf(112, 3, 672, 112, True, "HS", 1),
        bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2),  # C4
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1),
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1),
    ]
    last_channel = adjust_channels(1280 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


def mobilenet_v3_small(num_classes: int = 1000,
                       reduced_tail: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_conf(16, 3, 16, 16, True, "RE", 2),  # C1
        bneck_conf(16, 3, 72, 24, False, "RE", 2),  # C2
        bneck_conf(24, 3, 88, 24, False, "RE", 1),
        bneck_conf(24, 5, 96, 40, True, "HS", 2),  # C3
        bneck_conf(40, 5, 240, 40, True, "HS", 1),
        bneck_conf(40, 5, 240, 40, True, "HS", 1),
        bneck_conf(40, 5, 120, 48, True, "HS", 1),
        bneck_conf(48, 5, 144, 48, True, "HS", 1),
        bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2),  # C4
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1),
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1)
    ]
    last_channel = adjust_channels(1024 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/quantization_int8.py
================================================
from addict import Dict
from compression.engines.ie_engine import IEEngine
from compression.graph import load_model, save_model
from compression.graph.model_utils import compress_model_weights
from compression.pipeline.initializer import create_pipeline
from utils import MyDataLoader, Accuracy, read_split_data


def main():
    data_path = "/data/flower_photos"
    ir_model_xml = "ir_output/resnet34.xml"
    ir_model_bin = "ir_output/resnet34.bin"
    save_dir = "quant_ir_output"
    model_name = "quantized_resnet34"
    img_w = 224
    img_h = 224

    model_config = Dict({
        'model_name': 'resnet34',
        'model': ir_model_xml,
        'weights': ir_model_bin
    })
    engine_config = Dict({
        'device': 'CPU',
        'stat_requests_number': 2,
        'eval_requests_number': 2
    })
    dataset_config = {
        'data_source': data_path
    }
    algorithms = [
        {
            'name': 'DefaultQuantization',
            'params': {
                'target_device': 'CPU',
                'preset': 'performance',
                'stat_subset_size': 300
            }
        }
    ]

    # Steps 1-7: Model optimization
    # Step 1: Load the model.
    model = load_model(model_config)

    # Step 2: Initialize the data loader.
    _, _, val_images_path, val_images_label = read_split_data(data_path, val_rate=0.2)
    data_loader = MyDataLoader(dataset_config, val_images_path, val_images_label, img_w, img_h)

    # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric.
    metric = Accuracy(top_k=1)

    # Step 4: Initialize the engine for metric calculation and statistics collection.
    engine = IEEngine(engine_config, data_loader, metric)

    # Step 5: Create a pipeline of compression algorithms.
    pipeline = create_pipeline(algorithms, engine)

    # Step 6: Execute the pipeline.
    compressed_model = pipeline.run(model)

    # Step 7 (Optional): Compress model weights quantized precision
    #                    in order to reduce the size of final .bin file.
    compress_model_weights(compressed_model)

    # Step 8: Save the compressed model to the desired path.
    compressed_model_paths = save_model(model=compressed_model,
                                        save_path=save_dir,
                                        model_name=model_name)

    # Step 9: Compare accuracy of the original and quantized models.
    metric_results = pipeline.evaluate(model)
    if metric_results:
        for name, value in metric_results.items():
            print(f"Accuracy of the original model: {name}: {value}")

    metric_results = pipeline.evaluate(compressed_model)
    if metric_results:
        for name, value in metric_results.items():
            print(f"Accuracy of the optimized model: {name}: {value}")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/requirements.txt
================================================
torch==1.11.0
torchvision==0.12.0
onnx==1.13.0
onnxruntime==1.8.0
protobuf==3.19.5
openvino-dev==2022.1.0
matplotlib

================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/utils.py
================================================
import os
import json
import random

from PIL import Image
import numpy as np
from compression.api import DataLoader, Metric
from torchvision.transforms import transforms


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))

    return train_images_path, train_images_label, val_images_path, val_images_label


# Custom implementation of classification accuracy metric.
class Accuracy(Metric):
    # Required methods
    def __init__(self, top_k=1):
        super().__init__()
        self._top_k = top_k
        self._name = 'accuracy@top{}'.format(self._top_k)
        self._matches = []

    @property
    def value(self):
        """ Returns accuracy metric value for the last model output. """
        return {self._name: self._matches[-1]}

    @property
    def avg_value(self):
        """ Returns accuracy metric value for all model outputs. """
        return {self._name: np.ravel(self._matches).mean()}

    def update(self, output, target):
        """ Updates prediction matches.
        :param output: model output
        :param target: annotations
        """
        if len(output) > 1:
            raise Exception('The accuracy metric cannot be calculated '
                            'for a model with multiple outputs')
        if isinstance(target, dict):
            target = list(target.values())
        predictions = np.argsort(output[0], axis=1)[:, -self._top_k:]
        match = [float(t in predictions[i]) for i, t in enumerate(target)]

        self._matches.append(match)

    def reset(self):
        """ Resets collected matches """
        self._matches = []

    def get_attributes(self):
        """
        Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
        Required attributes: 'direction': 'higher-better' or 'higher-worse'
                             'type': metric type
        """
        return {self._name: {'direction': 'higher-better',
                             'type': 'accuracy'}}


class MyDataLoader(DataLoader):
    def __init__(self, cfg, images_path: list, images_label: list, img_w: int = 224, img_h: int = 224):
        super().__init__(cfg)
        self.images_path = images_path
        self.images_label = images_label
        self.image_w = img_w
        self.image_h = img_h
        self.transforms = transforms.Compose([
            transforms.Resize(min(img_h, img_w)),
            transforms.CenterCrop((img_h, img_w))
        ])

    def __len__(self):
        return len(self.images_label)

    def __getitem__(self, index):
        """
        Return one sample of index, label and picture.
        :param index: index of the taken sample.
        """
        if index >= len(self):
            raise IndexError

        img = Image.open(self.images_path[index])
        img = self.transforms(img)

        # Convert the resized images to network input shape
        # [h, w, c] -> [c, h, w] -> [1, c, h, w]
        img = np.expand_dims(np.transpose(np.array(img), (2, 0, 1)), 0)

        return (index, self.images_label[index]), img


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/README.md
================================================
OpenVINO量化YOLOv5

1. 按照`requirements.txt`配置环境
2. 将YOLOv5转为ONNX
YOLOv5官方有提供导出ONNX以及OpenVINO的方法，但我这里仅导出成ONNX，这里以YOLOv5s为例
```
python export.py --weights yolov5s.pt --include onnx
```

3. ONNX转换为IR
使用OpenVINO的`mo`工具将ONNX转为OpenVINO的IR格式
```
mo  --input_model yolov5s.onnx \
    --input_shape "[1,3,640,640]" \
    --scale 255 \
    --data_type FP32 \
    --output_dir ir_output
```

4. 量化模型
使用`quantization_int8.py`进行模型的量化，量化过程中需要使用到COCO2017数据集，需要将`data_path`指向coco2017目录
```
├── coco2017: 数据集根目录
     ├── train2017: 所有训练图像文件夹(118287张)
     ├── val2017: 所有验证图像文件夹(5000张)
     └── annotations: 对应标注文件夹
              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件
              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件
              ├── captions_train2017.json: 对应图像描述的训练集标注文件
              ├── captions_val2017.json: 对应图像描述的验证集标注文件
              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件
              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹
```

5. benchmark
直接利用`benchmark_app`工具测试量化前后的`Throughput`，这里以`CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz`设备为例
```
benchmark_app -m ir_output/yolov5s.xml -d CPU -api sync
```
output：
```
Latency:
    Median:     59.56 ms
    AVG:        63.30 ms
    MIN:        57.88 ms
    MAX:        99.89 ms
Throughput: 16.79 FPS
```

```
benchmark_app -m quant_ir_output/quantized_yolov5s.xml -d CPU -api sync
```
output:
```
Latency:
    Median:     42.97 ms
    AVG:        46.56 ms
    MIN:        41.18 ms
    MAX:        95.75 ms
Throughput: 23.27 FPS
```

================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_fps.py
================================================
import time
import numpy as np
import torch
import onnxruntime
import matplotlib.pyplot as plt
from openvino.runtime import Core


def normalize(image: np.ndarray) -> np.ndarray:
    """
    Normalize the image to the given mean and standard deviation
    """
    image = image.astype(np.float32)
    image /= 255.0
    return image


def onnx_inference(onnx_path: str, image: np.ndarray, num_images: int = 20):
    # load onnx model
    ort_session = onnxruntime.InferenceSession(onnx_path)

    # compute onnx Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: image}

    start = time.perf_counter()
    for _ in range(num_images):
        ort_session.run(None, ort_inputs)
    end = time.perf_counter()
    time_onnx = end - start
    print(
        f"ONNX model in Inference Engine/CPU: {time_onnx / num_images:.3f} "
        f"seconds per image, FPS: {num_images / time_onnx:.2f}"
    )

    return num_images / time_onnx


def ir_inference(ir_path: str, image: np.ndarray, num_images: int = 20):
    # Load the network in Inference Engine
    ie = Core()
    model_ir = ie.read_model(model=ir_path)
    compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")

    # Get input and output layers
    input_layer_ir = next(iter(compiled_model_ir.inputs))
    output_layer_ir = next(iter(compiled_model_ir.outputs))

    start = time.perf_counter()
    request_ir = compiled_model_ir.create_infer_request()
    for _ in range(num_images):
        request_ir.infer(inputs={input_layer_ir.any_name: image})
    end = time.perf_counter()
    time_ir = end - start
    print(
        f"IR model in Inference Engine/CPU: {time_ir / num_images:.3f} "
        f"seconds per image, FPS: {num_images / time_ir:.2f}"
    )

    return num_images / time_ir


def pytorch_inference(image: np.ndarray, num_images: int = 20):
    image = torch.as_tensor(image, dtype=torch.float32)

    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
    model.eval()

    with torch.no_grad():
        start = time.perf_counter()
        for _ in range(num_images):
            model(image)
        end = time.perf_counter()
        time_torch = end - start

    print(
        f"PyTorch model on CPU: {time_torch / num_images:.3f} seconds per image, "
        f"FPS: {num_images / time_torch:.2f}"
    )

    return num_images / time_torch


def plot_fps(v: dict):
    x = list(v.keys())
    y = list(v.values())

    plt.bar(range(len(x)), y, align='center')
    plt.xticks(range(len(x)), x)
    for i, v in enumerate(y):
        plt.text(x=i, y=v+0.5, s=f"{v:.2f}", ha='center')
    plt.xlabel('model format')
    plt.ylabel('fps')
    plt.title('FPS comparison')
    plt.show()
    plt.savefig('fps_vs.jpg')


def main():
    image_h = 640
    image_w = 640
    onnx_path = "yolov5s.onnx"
    ir_path = "ir_output/yolov5s.xml"

    image = np.random.randn(image_h, image_w, 3)
    normalized_image = normalize(image)

    # Convert the resized images to network input shape
    # [h, w, c] -> [c, h, w] -> [1, c, h, w]
    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)
    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)

    onnx_fps = onnx_inference(onnx_path, normalized_input_image, num_images=100)
    ir_fps = ir_inference(ir_path, input_image, num_images=100)
    pytorch_fps = pytorch_inference(normalized_input_image, num_images=100)
    plot_fps({"pytorch": round(pytorch_fps, 2),
              "onnx": round(onnx_fps, 2),
              "ir": round(ir_fps, 2)})


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_onnx_and_ir.py
================================================
import numpy as np
import onnxruntime
from openvino.runtime import Core


def normalize(image: np.ndarray) -> np.ndarray:
    """
    Normalize the image to the given mean and standard deviation
    """
    image = image.astype(np.float32)
    image /= 255.0
    return image


def onnx_inference(onnx_path: str, image: np.ndarray):
    # load onnx model
    ort_session = onnxruntime.InferenceSession(onnx_path)

    # compute onnx Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: image}
    res_onnx = ort_session.run(None, ort_inputs)[0]
    return res_onnx


def ir_inference(ir_path: str, image: np.ndarray):
    # Load the network in Inference Engine
    ie = Core()
    model_ir = ie.read_model(model=ir_path)
    compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")

    # Get input and output layers
    input_layer_ir = next(iter(compiled_model_ir.inputs))
    output_layer_ir = next(iter(compiled_model_ir.outputs))

    # Run inference on the input image
    res_ir = compiled_model_ir([image])[output_layer_ir]
    return res_ir


def main():
    image_h = 640
    image_w = 640
    onnx_path = "yolov5s.onnx"
    ir_path = "ir_output/yolov5s.xml"

    image = np.random.randn(image_h, image_w, 3)
    normalized_image = normalize(image)

    # Convert the resized images to network input shape
    # [h, w, c] -> [c, h, w] -> [1, c, h, w]
    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)
    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)

    onnx_res = onnx_inference(onnx_path, normalized_input_image)
    ir_res = ir_inference(ir_path, input_image)
    np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with OpenvinoRuntime, and the result looks good!")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = False):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/evaluation.py
================================================
from tqdm import tqdm
import torch
from openvino.runtime import Core
from utils import MyDataLoader, EvalCOCOMetric, non_max_suppression


def main():
    data_path = "/data/coco2017"
    ir_model_xml = "quant_ir_output/quantized_yolov5s.xml"
    img_size = (640, 640)  # h, w

    data_loader = MyDataLoader(data_path, "val", size=img_size)
    coco80_to_91 = data_loader.coco_id80_to_id91
    metrics = EvalCOCOMetric(coco=data_loader.coco, classes_mapping=coco80_to_91)

    # Load the network in Inference Engine
    ie = Core()
    model_ir = ie.read_model(model=ir_model_xml)
    compiled_model = ie.compile_model(model=model_ir, device_name="CPU")
    inputs_names = compiled_model.inputs
    outputs_names = compiled_model.outputs

    # inference
    request = compiled_model.create_infer_request()
    for i in tqdm(range(len(data_loader))):
        data = data_loader[i]
        ann, img, info = data
        ann = ann + (info,)

        request.infer(inputs={inputs_names[0]: img})
        result = request.get_output_tensor(outputs_names[0].index).data

        # post-process
        result = non_max_suppression(torch.Tensor(result), conf_thres=0.001, iou_thres=0.6, multi_label=True)[0]
        boxes = result[:, :4].numpy()
        scores = result[:, 4].numpy()
        cls = result[:, 5].numpy().astype(int)
        metrics.update(ann, [boxes, cls, scores])

    metrics.evaluate()


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/predict.py
================================================
import cv2
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
from openvino.runtime import Core
from utils import letterbox, scale_coords, non_max_suppression, coco80_names
from draw_box_utils import draw_objs


def main():
    img_path = "test.jpg"
    ir_model_xml = "ir_output/yolov5s.xml"
    img_size = (640, 640)  # h, w

    origin_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
    reshape_img, ratio, pad = letterbox(origin_img, img_size, auto=False)
    input_img = np.expand_dims(np.transpose(reshape_img, [2, 0, 1]), 0).astype(np.float32)

    # Load the network in Inference Engine
    ie = Core()
    model_ir = ie.read_model(model=ir_model_xml)
    compiled_model = ie.compile_model(model=model_ir, device_name="CPU")
    inputs_names = compiled_model.inputs
    outputs_names = compiled_model.outputs

    # inference
    request = compiled_model.create_infer_request()
    request.infer(inputs={inputs_names[0]: input_img})
    result = request.get_output_tensor(outputs_names[0].index).data

    # post-process
    result = non_max_suppression(torch.Tensor(result))[0]
    boxes = result[:, :4].numpy()
    scores = result[:, 4].numpy()
    cls = result[:, 5].numpy().astype(int)
    boxes = scale_coords(reshape_img.shape, boxes, origin_img.shape, (ratio, pad))

    draw_img = draw_objs(Image.fromarray(origin_img),
                         boxes,
                         cls,
                         scores,
                         category_index=dict([(str(i), v) for i, v in enumerate(coco80_names)]))
    plt.imshow(draw_img)
    plt.show()
    draw_img.save("predict.jpg")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/quantization_int8.py
================================================
import time
from addict import Dict
from compression.engines.ie_engine import IEEngine
from compression.graph import load_model, save_model
from compression.graph.model_utils import compress_model_weights
from compression.pipeline.initializer import create_pipeline
from yaspin import yaspin
from utils import MyDataLoader, MAPMetric


def main():
    data_path = "/data/coco2017"
    ir_model_xml = "ir_output/yolov5s.xml"
    ir_model_bin = "ir_output/yolov5s.bin"
    save_dir = "quant_ir_output"
    model_name = "quantized_yolov5s"
    img_w = 640
    img_h = 640

    model_config = Dict({
        'model_name': 'yolov5s',
        'model': ir_model_xml,
        'weights': ir_model_bin,
        'inputs': 'images',
        'outputs': 'output'
    })
    engine_config = Dict({'device': 'CPU'})

    algorithms = [
        {
            'name': 'DefaultQuantization',
            'params': {
                'target_device': 'CPU',
                'preset': 'performance',
                'stat_subset_size': 300
            }
        }
    ]

    # Step 1: Load the model.
    model = load_model(model_config)

    # Step 2: Initialize the data loader.
    data_loader = MyDataLoader(data_path, "val", (img_h, img_w))

    # Step 3: initialize the metric
    # For DefaultQuantization, specifying a metric is optional: metric can be set to None
    metric = MAPMetric(map_value="map")

    # Step 4: Initialize the engine for metric calculation and statistics collection.
    engine = IEEngine(config=engine_config, data_loader=data_loader, metric=metric)

    # Step 5: Create a pipeline of compression algorithms.
    pipeline = create_pipeline(algorithms, engine)

    # Step 6: Execute the pipeline to quantize the model
    algorithm_name = pipeline.algo_seq[0].name
    with yaspin(
            text=f"Executing POT pipeline on {model_config['model']} with {algorithm_name}"
    ) as sp:
        start_time = time.perf_counter()
        compressed_model = pipeline.run(model)
        end_time = time.perf_counter()
        sp.ok("✔")
    print(f"Quantization finished in {end_time - start_time:.2f} seconds")

    # Step 7 (Optional): Compress model weights to quantized precision
    #                    in order to reduce the size of the final .bin file
    compress_model_weights(compressed_model)

    # Step 8: Save the compressed model to the desired path.
    # Set save_path to the directory where the compressed model should be stored
    compressed_model_paths = save_model(
        model=compressed_model,
        save_path=save_dir,
        model_name=model_name,
    )

    compressed_model_path = compressed_model_paths[0]["model"]
    print("The quantized model is stored at", compressed_model_path)

    # Compute the mAP on the quantized model and compare with the mAP on the FP16 IR model.
    ir_model = load_model(model_config=model_config)
    evaluation_pipeline = create_pipeline(algo_config=dict(), engine=engine)

    with yaspin(text="Evaluating original IR model") as sp:
        original_metric = evaluation_pipeline.evaluate(ir_model)

    if original_metric:
        for key, value in original_metric.items():
            print(f"The {key} score of the original model is {value:.5f}")

    with yaspin(text="Evaluating quantized IR model") as sp:
        quantized_metric = pipeline.evaluate(compressed_model)

    if quantized_metric:
        for key, value in quantized_metric.items():
            print(f"The {key} score of the quantized INT8 model is {value:.5f}")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/requirements.txt
================================================
torch==1.13.1
torchvision==0.12.0
onnx==1.13.0
onnxruntime==1.8.0
protobuf==3.19.5
openvino-dev==2022.1.0
matplotlib
torchmetrics==0.9.1

================================================
FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/utils.py
================================================
import os
import time
import json
import copy

import cv2
import numpy as np
import torch
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torchvision
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from compression.api import DataLoader, Metric


coco80_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
                'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
                'scissors', 'teddy bear', 'hair drier', 'toothbrush']


def box_iou(box1, box2):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    def box_area(box):
        # box = 4xn
        return (box[2] - box[0]) * (box[3] - box[1])

    area1 = box_area(box1.T)
    area2 = box_area(box2.T)

    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)


def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
                        labels=(), max_det=300):
    """Runs Non-Maximum Suppression (NMS) on inference results

    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """

    nc = prediction.shape[2] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'

    # Settings
    min_wh, max_wh = 2, 7680  # (pixels) minimum and maximum box width and height
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

    t = time.time()
    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            lb = labels[xi]
            v = torch.zeros((len(lb), nc + 5), device=x.device)
            v[:, :4] = lb[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            print(f'WARNING: NMS time limit {time_limit}s exceeded')
            break  # time limit exceeded

    return output


class MAPMetric(Metric):
    def __init__(self, map_value="map", conf_thres=0.001, iou_thres=0.6):
        """
        Mean Average Precision Metric. Wraps torchmetrics implementation, see
        https://torchmetrics.readthedocs.io/en/stable/detection/mean_average_precision.html

        :map_value: specific metric to return. Default: "map"
                    Change `to one of the values in the list below to return a different value
                    ['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large',
                     'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large']
                    See torchmetrics documentation for more details.
        """

        self._name = map_value
        self.metric = MeanAveragePrecision(box_format="xyxy")
        self.conf_thres = conf_thres
        self.iou_thres = iou_thres
        super().__init__()

    @property
    def value(self):
        """
        Returns metric value for the last model output.
        Possible format: {metric_name: [metric_values_per_image]}
        """
        return {self._name: [0]}

    @property
    def avg_value(self):
        """
        Returns average metric value for all model outputs.
        Possible format: {metric_name: metric_value}
        """
        return {self._name: self.metric.compute()[self._name].item()}

    def update(self, output, target):
        """
        Convert network output and labels to the format that torchmetrics' MAP
        implementation expects, and call `metric.update()`.

        :param output: model output
        :param target: annotations for model output
        """
        targetboxes = []
        targetlabels = []
        predboxes = []
        predlabels = []
        scores = []

        for single_target in target[0]:
            txmin, tymin, txmax, tymax = single_target["bbox"]
            category = single_target["category_id"]

            targetbox = [round(txmin), round(tymin), round(txmax), round(tymax)]
            targetboxes.append(targetbox)
            targetlabels.append(category)

        output = torch.Tensor(output[0]).float()
        output = non_max_suppression(output, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True)
        for single_output in output:
            for pred in single_output.numpy():
                xmin, ymin, xmax, ymax, conf, label = pred

                predbox = [round(xmin), round(ymin), round(xmax), round(ymax)]
                predboxes.append(predbox)
                predlabels.append(label)
                scores.append(conf)

        preds = [
            dict(
                boxes=torch.Tensor(predboxes).float(),
                labels=torch.Tensor(predlabels).short(),
                scores=torch.Tensor(scores),
            )
        ]
        targets = [
            dict(
                boxes=torch.Tensor(targetboxes).float(),
                labels=torch.Tensor(targetlabels).short(),
            )
        ]
        self.metric.update(preds, targets)

    def reset(self):
        """
        Resets metric
        """
        self.metric.reset()

    def get_attributes(self):
        """
        Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
        Required attributes: 'direction': 'higher-better' or 'higher-worse'
                             'type': metric type
        """
        return {self._name: {"direction": "higher-better", "type": "mAP"}}


def _coco_remove_images_without_annotations(dataset, ids):
    """
    删除coco数据集中没有目标，或者目标面积非常小的数据
    refer to:
    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py
    :param dataset:
    :param cat_list:
    :return:
    """
    def _has_only_empty_bbox(anno):
        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)

    def _has_valid_annotation(anno):
        # if it's empty, there is no annotation
        if len(anno) == 0:
            return False
        # if all boxes have close to zero area, there is no annotation
        if _has_only_empty_bbox(anno):
            return False

        return True

    valid_ids = []
    for ds_idx, img_id in enumerate(ids):
        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)
        anno = dataset.loadAnns(ann_ids)

        if _has_valid_annotation(anno):
            valid_ids.append(img_id)

    return valid_ids


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        assert ratio_pad[0][0] == ratio_pad[0][1]
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def clip_coords(boxes, shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[:, 0].clamp_(0, shape[1])  # x1
        boxes[:, 1].clamp_(0, shape[0])  # y1
        boxes[:, 2].clamp_(0, shape[1])  # x2
        boxes[:, 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2


def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (left, top)


class MyDataLoader(DataLoader):
    """`MS Coco Detection <https://cocodataset.org/>`_ Dataset.

    Args:
        root (string): Root directory where images are downloaded to.
        dataset (string): "train" or "val.
        size (tuple): (h, w)
    """
    def __init__(self, root, dataset="train", size=(640, 640)):
        assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]'
        anno_file = "instances_{}2017.json".format(dataset)
        assert os.path.exists(root), "file '{}' does not exist.".format(root)
        self.img_root = os.path.join(root, "{}2017".format(dataset))
        assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root)
        self.anno_path = os.path.join(root, "annotations", anno_file)
        assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path)

        self.mode = dataset
        self.size = size
        self.coco = COCO(self.anno_path)

        self.coco91_id2classes = dict([(v["id"], v["name"]) for k, v in self.coco.cats.items()])
        coco90_classes2id = dict([(v["name"], v["id"]) for k, v in self.coco.cats.items()])

        self.coco80_classes = coco80_names
        self.coco_id80_to_id91 = dict([(i, coco90_classes2id[k]) for i, k in enumerate(coco80_names)])

        ids = list(sorted(self.coco.imgs.keys()))

        # 移除没有目标，或者目标面积非常小的数据
        valid_ids = _coco_remove_images_without_annotations(self.coco, ids)
        self.ids = valid_ids

    def parse_targets(self,
                      coco_targets: list,
                      w: int = None,
                      h: int = None,
                      ratio: tuple = None,
                      pad: tuple = None):
        assert w > 0
        assert h > 0

        # 只筛选出单个对象的情况
        anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]

        boxes = [obj["bbox"] for obj in anno]

        # guard against no boxes via resizing
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
        # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2] = np.clip(boxes[:, 0::2], a_min=0, a_max=w)
        boxes[:, 1::2] = np.clip(boxes[:, 1::2], a_min=0, a_max=h)

        classes = [self.coco80_classes.index(self.coco91_id2classes[obj["category_id"]])
                   for obj in anno]
        classes = np.array(classes, dtype=int)

        # 筛选出合法的目标，即x_max>x_min且y_max>y_min
        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]

        if ratio is not None:
            # width, height ratios
            boxes[:, 0::2] *= ratio[0]
            boxes[:, 1::2] *= ratio[1]

        if pad is not None:
            # dw, dh padding
            dw, dh = pad
            boxes[:, 0::2] += dw
            boxes[:, 1::2] += dh

        target_annotations = []
        for i in range(boxes.shape[0]):
            target_annotation = {
                "category_id": int(classes[i]),
                "bbox": boxes[i].tolist()
            }
            target_annotations.append(target_annotation)

        return target_annotations

    def __getitem__(self, index):
        """
        Get an item from the dataset at the specified index.
        Detection boxes are converted from absolute coordinates to relative coordinates
        between 0 and 1 by dividing xmin, xmax by image width and ymin, ymax by image height.

        :return: (annotation, input_image, metadata) where annotation is (index, target_annotation)
                 with target_annotation as a dictionary with keys category_id, image_width, image_height
                 and bbox, containing the relative bounding box coordinates [xmin, ymin, xmax, ymax]
                 (with values between 0 and 1) and metadata a dictionary: {"filename": path_to_image}
        """
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        coco_target = coco.loadAnns(ann_ids)

        image_path = coco.loadImgs(img_id)[0]['file_name']
        img = cv2.imread(os.path.join(self.img_root, image_path))

        origin_h, origin_w, c = img.shape
        image, ratio, pad = letterbox(img, auto=False, new_shape=self.size)
        target_annotations = self.parse_targets(coco_target, origin_w, origin_h, ratio, pad)

        item_annotation = (index, target_annotations)
        input_image = np.expand_dims(image.transpose(2, 0, 1), axis=0).astype(
            np.float32
        )
        return (
            item_annotation,
            input_image,
            {"filename": str(image_path),
             "origin_shape": img.shape,
             "shape": image.shape,
             "img_id": img_id,
             "ratio_pad": [ratio, pad]},
        )

    def __len__(self):
        return len(self.ids)

    @staticmethod
    def collate_fn(x):
        return x


class EvalCOCOMetric:
    def __init__(self,
                 coco: COCO = None,
                 iou_type: str = "bbox",
                 results_file_name: str = "predict_results.json",
                 classes_mapping: dict = None):
        self.coco = copy.deepcopy(coco)
        self.results = []
        self.classes_mapping = classes_mapping
        self.coco_evaluator = None
        assert iou_type in ["bbox"]
        self.iou_type = iou_type
        self.results_file_name = results_file_name

    def prepare_for_coco_detection(self, ann, output):
        """将预测的结果转换成COCOeval指定的格式，针对目标检测任务"""
        # 遍历每张图像的预测结果
        if len(output[0]) == 0:
            return

        img_id = ann[2]["img_id"]
        per_image_boxes = output[0]
        per_image_boxes = scale_coords(img1_shape=ann[2]["shape"],
                                       coords=per_image_boxes,
                                       img0_shape=ann[2]["origin_shape"],
                                       ratio_pad=ann[2]["ratio_pad"])
        # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]
        # 而我们预测的box格式是[x_min, y_min, x_max, y_max]，所以需要转下格式
        per_image_boxes[:, 2:] -= per_image_boxes[:, :2]
        per_image_classes = output[1].tolist()
        per_image_scores = output[2].tolist()

        # 遍历每个目标的信息
        for object_score, object_class, object_box in zip(
                per_image_scores, per_image_classes, per_image_boxes):
            object_score = float(object_score)
            class_idx = int(object_class)
            if self.classes_mapping is not None:
                class_idx = self.classes_mapping[class_idx]
            # We recommend rounding coordinates to the nearest tenth of a pixel
            # to reduce resulting JSON file size.
            object_box = [round(b, 2) for b in object_box.tolist()]

            res = {"image_id": img_id,
                   "category_id": class_idx,
                   "bbox": object_box,
                   "score": round(object_score, 3)}
            self.results.append(res)

    def update(self, targets, outputs):
        if self.iou_type == "bbox":
            self.prepare_for_coco_detection(targets, outputs)
        else:
            raise KeyError(f"not support iou_type: {self.iou_type}")

    def evaluate(self):
        # write predict results into json file
        json_str = json.dumps(self.results, indent=4)
        with open(self.results_file_name, 'w') as json_file:
            json_file.write(json_str)

        # accumulate predictions from all images
        coco_true = self.coco
        coco_pre = coco_true.loadRes(self.results_file_name)

        self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)

        self.coco_evaluator.evaluate()
        self.coco_evaluator.accumulate()
        print(f"IoU metric: {self.iou_type}")
        self.coco_evaluator.summarize()

        coco_info = self.coco_evaluator.stats.tolist()  # numpy to list
        return coco_info


================================================
FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/compare_onnx_and_trt.py
================================================
import numpy as np
import tensorrt as trt
import onnxruntime
import pycuda.driver as cuda
import pycuda.autoinit


def normalize(image: np.ndarray) -> np.ndarray:
    """
    Normalize the image to the given mean and standard deviation
    """
    image = image.astype(np.float32)
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    image /= 255.0
    image -= mean
    image /= std
    return image


def onnx_inference(onnx_path: str, image: np.ndarray):
    # load onnx model
    ort_session = onnxruntime.InferenceSession(onnx_path)

    # compute onnx Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: image}
    res_onnx = ort_session.run(None, ort_inputs)[0]
    return res_onnx


def trt_inference(trt_path: str, image: np.ndarray):
    # Load the network in Inference Engine
    trt_logger = trt.Logger(trt.Logger.WARNING)
    with open(trt_path, "rb") as f, trt.Runtime(trt_logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())

    with engine.create_execution_context() as context:
        # Set input shape based on image dimensions for inference
        context.set_binding_shape(engine.get_binding_index("input"), (1, 3, image.shape[-2], image.shape[-1]))
        # Allocate host and device buffers
        bindings = []
        for binding in engine:
            binding_idx = engine.get_binding_index(binding)
            size = trt.volume(context.get_binding_shape(binding_idx))
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            if engine.binding_is_input(binding):
                input_buffer = np.ascontiguousarray(image)
                input_memory = cuda.mem_alloc(image.nbytes)
                bindings.append(int(input_memory))
            else:
                output_buffer = cuda.pagelocked_empty(size, dtype)
                output_memory = cuda.mem_alloc(output_buffer.nbytes)
                bindings.append(int(output_memory))

        stream = cuda.Stream()
        # Transfer input data to the GPU.
        cuda.memcpy_htod_async(input_memory, input_buffer, stream)
        # Run inference
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        # Transfer prediction output from the GPU.
        cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
        # Synchronize the stream
        stream.synchronize()

        res_trt = np.reshape(output_buffer, (1, -1))

    return res_trt


def main():
    image_h = 224
    image_w = 224
    onnx_path = "resnet34.onnx"
    trt_path = "trt_output/resnet34.trt"

    image = np.random.randn(image_h, image_w, 3)
    normalized_image = normalize(image)

    # Convert the resized images to network input shape
    # [h, w, c] -> [c, h, w] -> [1, c, h, w]
    normalized_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)

    onnx_res = onnx_inference(onnx_path, normalized_image)
    ir_res = trt_inference(trt_path, normalized_image)
    np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with TensorRT Runtime, and the result looks good!")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/convert_pytorch2onnx.py
================================================
import torch
import torch.onnx
import onnx
import onnxruntime
import numpy as np
from torchvision.models import resnet34

device = torch.device("cpu")


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def main():
    weights_path = "resNet34(flower).pth"
    onnx_file_name = "resnet34.onnx"
    batch_size = 1
    img_h = 224
    img_w = 224
    img_channel = 3

    # create model and load pretrain weights
    model = resnet34(pretrained=False, num_classes=5)
    model.load_state_dict(torch.load(weights_path, map_location='cpu'))

    model.eval()
    # input to the model
    # [batch, channel, height, width]
    x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True)
    torch_out = model(x)

    # export the model
    torch.onnx.export(model,             # model being run
                      x,                 # model input (or a tuple for multiple inputs)
                      onnx_file_name,    # where to save the model (can be a file or file-like object)
                      input_names=["input"],
                      output_names=["output"],
                      verbose=False)

    # check onnx model
    onnx_model = onnx.load(onnx_file_name)
    onnx.checker.check_model(onnx_model)

    ort_session = onnxruntime.InferenceSession(onnx_file_name)

    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
    ort_outs = ort_session.run(None, ort_inputs)

    # compare ONNX Runtime and Pytorch results
    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.
    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with ONNXRuntime, and the result looks good!")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/quantization.py
================================================
"""
refer to:
https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/userguide.html
"""
import os
import math
import argparse

from absl import logging
from tqdm import tqdm
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms
from torchvision.models.resnet import resnet34 as create_model
from pytorch_quantization import nn as quant_nn
from pytorch_quantization import quant_modules, calib
from pytorch_quantization.tensor_quant import QuantDescriptor

from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate

logging.set_verbosity(logging.FATAL)


def export_onnx(model, onnx_filename, onnx_bs):
    model.eval()
    # We have to shift to pytorch's fake quant ops before exporting the model to ONNX
    quant_nn.TensorQuantizer.use_fb_fake_quant = True
    opset_version = 13

    print(f"Export ONNX file: {onnx_filename}")
    dummy_input = torch.randn(onnx_bs, 3, 224, 224).cuda()
    torch.onnx.export(model,
                      dummy_input,
                      onnx_filename,
                      verbose=False,
                      opset_version=opset_version,
                      enable_onnx_checker=False,
                      input_names=["input"],
                      output_names=["output"])


def collect_stats(model, data_loader, num_batches):
    """Feed data to the network and collect statistic"""

    # Enable calibrators
    for name, module in model.named_modules():
        if isinstance(module, quant_nn.TensorQuantizer):
            if module._calibrator is not None:
                module.disable_quant()
                module.enable_calib()
            else:
                module.disable()

    for i, (images, _) in tqdm(enumerate(data_loader), total=num_batches):
        model(images.cuda())
        if i >= num_batches:
            break

    # Disable calibrators
    for name, module in model.named_modules():
        if isinstance(module, quant_nn.TensorQuantizer):
            if module._calibrator is not None:
                module.enable_quant()
                module.disable_calib()
            else:
                module.enable()


def compute_amax(model, **kwargs):
    # Load calib result
    for name, module in model.named_modules():
        if isinstance(module, quant_nn.TensorQuantizer):
            if module._calibrator is not None:
                if isinstance(module._calibrator, calib.MaxCalibrator):
                    module.load_calib_amax()
                else:
                    module.load_calib_amax(**kwargs)
            print(f"{name:40}: {module}")
    model.cuda()


def main(args):
    quant_modules.initialize()
    assert torch.cuda.is_available(), "only support GPU!"

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # ########################## #
    # Post Training Quantization #
    # ########################## #
    # We will use histogram based calibration for activations and the default max calibration for weights.
    quant_desc_input = QuantDescriptor(calib_method='histogram')
    quant_nn.QuantConv2d.set_default_quant_desc_input(quant_desc_input)
    quant_nn.QuantLinear.set_default_quant_desc_input(quant_desc_input)

    model = create_model(num_classes=args.num_classes)
    assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
    model.load_state_dict(torch.load(args.weights, map_location='cpu'))
    model.cuda()

    # It is a bit slow since we collect histograms on CPU
    with torch.no_grad():
        collect_stats(model, val_loader, num_batches=1000 // batch_size)
        compute_amax(model, method="percentile", percentile=99.99)
        # validate
        evaluate(model=model, data_loader=val_loader, epoch=0)

    torch.save(model.state_dict(), "quant_model_calibrated.pth")

    if args.qat:
        # ########################### #
        # Quantization Aware Training #
        # ########################### #
        pg = [p for p in model.parameters() if p.requires_grad]
        optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)
        # Scheduler(half of a cosine period)
        lf = lambda x: (math.cos(x * math.pi / 2 / args.epochs)) * (1 - args.lrf) + args.lrf
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

        for epoch in range(args.epochs):
            # train
            train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, epoch=epoch)

            scheduler.step()

            # validate
            evaluate(model=model, data_loader=val_loader, epoch=epoch)

    export_onnx(model, args.onnx_filename, args.onnx_bs)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=5)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--lr', type=float, default=0.0001)
    parser.add_argument('--lrf', type=float, default=0.01)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # 训练好的权重路径
    parser.add_argument('--weights', type=str, default='./resNet(flower).pth',
                        help='trained weights path')

    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    parser.add_argument('--onnx-filename', default='resnet34.onnx', help='save onnx model filename')
    parser.add_argument('--onnx-bs', default=1, help='save onnx model batch size')
    parser.add_argument('--qat', type=bool, default=True, help='whether use quantization aware training')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))

    return train_images_path, train_images_label, val_images_path, val_images_label


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    accu_loss = torch.zeros(1).cuda()  # 累计损失
    accu_num = torch.zeros(1).cuda()   # 累计预测正确的样本数
    optimizer.zero_grad()

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.cuda())
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.cuda()).sum()

        loss = loss_function(pred, labels.cuda())
        loss.backward()
        accu_loss += loss.detach()

        data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


@torch.no_grad()
def evaluate(model, data_loader, epoch):
    loss_function = torch.nn.CrossEntropyLoss()

    model.eval()

    accu_num = torch.zeros(1).cuda()   # 累计预测正确的样本数
    accu_loss = torch.zeros(1).cuda()  # 累计损失

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.cuda())
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.cuda()).sum()

        loss = loss_function(pred, labels.cuda())
        accu_loss += loss

        data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


================================================
FILE: deploying_service/deploying_pytorch/pytorch_flask_service/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: deploying_service/deploying_pytorch/pytorch_flask_service/main.py
================================================
import os
import io
import json
import torch
import torchvision.transforms as transforms
from PIL import Image
from flask import Flask, jsonify, request, render_template
from flask_cors import CORS
from model import MobileNetV2

app = Flask(__name__)
CORS(app)  # 解决跨域问题

weights_path = "./MobileNetV2(flower).pth"
class_json_path = "./class_indices.json"
assert os.path.exists(weights_path), "weights path does not exist..."
assert os.path.exists(class_json_path), "class json path does not exist..."

# select device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# create model
model = MobileNetV2(num_classes=5).to(device)
# load model weights
model.load_state_dict(torch.load(weights_path, map_location=device))

model.eval()

# load class info
json_file = open(class_json_path, 'rb')
class_indict = json.load(json_file)


def transform_image(image_bytes):
    my_transforms = transforms.Compose([transforms.Resize(255),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(
                                            [0.485, 0.456, 0.406],
                                            [0.229, 0.224, 0.225])])
    image = Image.open(io.BytesIO(image_bytes))
    if image.mode != "RGB":
        raise ValueError("input file does not RGB image...")
    return my_transforms(image).unsqueeze(0).to(device)


def get_prediction(image_bytes):
    try:
        tensor = transform_image(image_bytes=image_bytes)
        outputs = torch.softmax(model.forward(tensor).squeeze(), dim=0)
        prediction = outputs.detach().cpu().numpy()
        template = "class:{:<15} probability:{:.3f}"
        index_pre = [(class_indict[str(index)], float(p)) for index, p in enumerate(prediction)]
        # sort probability
        index_pre.sort(key=lambda x: x[1], reverse=True)
        text = [template.format(k, v) for k, v in index_pre]
        return_info = {"result": text}
    except Exception as e:
        return_info = {"result": [str(e)]}
    return return_info


@app.route("/predict", methods=["POST"])
@torch.no_grad()
def predict():
    image = request.files["file"]
    img_bytes = image.read()
    info = get_prediction(image_bytes=img_bytes)
    return jsonify(info)


@app.route("/", methods=["GET", "POST"])
def root():
    return render_template("up.html")


if __name__ == '__main__':
    app.run(host="0.0.0.0", port=5000)


================================================
FILE: deploying_service/deploying_pytorch/pytorch_flask_service/model.py
================================================
from torch import nn
import torch


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel

        layers = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))
        layers.extend([
            # 3x3 depthwise conv
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
            # 1x1 pointwise conv(linear)
            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # conv1 layer
        features.append(ConvBNReLU(3, input_channel, stride=2))
        # building inverted residual residual blockes
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, last_channel, 1))
        # combine feature layers
        self.features = nn.Sequential(*features)

        # building classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


================================================
FILE: deploying_service/deploying_pytorch/pytorch_flask_service/requirements.txt
================================================
Flask==2.2.5
Flask_Cors==3.0.9
Pillow


================================================
FILE: deploying_service/deploying_pytorch/pytorch_flask_service/templates/up.html
================================================
<!DOCTYPE html>
<html>
<head>
    <title>HTML5上传图片并预览</title>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    <script src="{{ url_for('static', filename='js/jquery.min.js') }}"></script>
</head>
<body>
<!--<h3>请选择图片文件：PNG/JPG/JPEG/SVG/GIF</h3>-->
<div style="text-align: left;margin-left:500px;margin-top:100px;" >
    <div style="float:left;">
        <a href="javascript:;" class="file">选择文件
            <input type="file" name="file" id="file0"><br>
        </a>
        <img src="" id="img0" style="margin-top:20px;width: 35rem;height: 30rem;">
    </div>
    <div style="float:left;margin-left:50px;">
        <input type="button" id="b0" onclick="test()" value="预测">
        <pre  id="out" style="width:320px;height:50px;line-height: 50px;margin-top:20px;"></pre>
    </div>
</div>

<script type="text/javascript">
    $("#file0").change(function(){
        var objUrl = getObjectURL(this.files[0]) ;//获取文件信息
        console.log("objUrl = "+objUrl);
        if (objUrl) {
            $("#img0").attr("src", objUrl);
        }
    });

    function test() {
        var fileobj = $("#file0")[0].files[0];
        console.log(fileobj);
        var form = new FormData();
        form.append("file", fileobj);
        var out='';
        var flower='';
        $.ajax({
            type: 'POST',
            url: "predict",
            data: form,
            async: false,       //同步执行
            processData: false, // 告诉jquery要传输data对象
            contentType: false, //告诉jquery不需要增加请求头对于contentType的设置
            success: function (arg) {
            console.log(arg)
            out = arg.result;
        },error:function(){
                console.log("后台处理错误");
            }
    });

        out.forEach(e=>{
            flower+=`<div style="border-bottom: 1px solid #CCCCCC;line-height: 60px;font-size:16px;">${e}</div>`
        });

        document.getElementById("out").innerHTML=flower;

    }

    function getObjectURL(file) {
        var url = null;
        if(window.createObjectURL!=undefined) {
            url = window.createObjectURL(file) ;
        }else if (window.URL!=undefined) { // mozilla(firefox)
            url = window.URL.createObjectURL(file) ;
        }else if (window.webkitURL!=undefined) { // webkit or chrome
            url = window.webkitURL.createObjectURL(file) ;
        }
        return url ;
    }
</script>
<style>
    .file {
        position: relative;
        /*display: inline-block;*/
        background: #CCC ;
        border: 1px solid #CCC;
        padding: 4px 4px;
        overflow: hidden;
        text-decoration: none;
        text-indent: 0;
        width:100px;
        height:30px;
        line-height: 30px;
        border-radius: 5px;
        color: #333;
        font-size: 13px;

    }
    .file input {
        position: absolute;
        font-size: 13px;
        right: 0;
        top: 0;
        opacity: 0;
        border: 1px solid #333;
        padding: 4px 4px;
        overflow: hidden;
        text-indent: 0;
        width:100px;
        height:30px;
        line-height: 30px;
        border-radius: 5px;
        color: #FFFFFF;

    }
    #b0{
        background: #1899FF;
        border: 1px solid #CCC;
        padding: 4px 10px;
        overflow: hidden;
        text-indent: 0;
        width:60px;
        height:28px;
        line-height: 20px;
        border-radius: 5px;
        color: #FFFFFF;
        font-size: 13px;
    }

    /*.gradient{*/

        /*filter:alpha(opacity=100 finishopacity=50 style=1 startx=0,starty=0,finishx=0,finishy=150) progid:DXImageTransform.Microsoft.gradient(startcolorstr=#fff,endcolorstr=#ccc,gradientType=0);*/
        /*-ms-filter:alpha(opacity=100 finishopacity=50 style=1 startx=0,starty=0,finishx=0,finishy=150) progid:DXImageTransform.Microsoft.gradient(startcolorstr=#fff,endcolorstr=#ccc,gradientType=0);!*IE8*!*/
        /*background:#1899FF; !* 一些不支持背景渐变的浏览器 *!*/
        /*background:-moz-linear-gradient(top, #fff, #1899FF);*/
        /*background:-webkit-gradient(linear, 0 0, 0 bottom, from(#fff), to(#ccc));*/
        /*background:-o-linear-gradient(top, #fff, #ccc);*/
    /*}*/
</style>
</body>
</html>


================================================
FILE: deploying_service/pruning_model_pytorch/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: deploying_service/pruning_model_pytorch/main.py
================================================
import os
import torch
from torchvision import transforms, datasets
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from tqdm import tqdm
import time
from model import resnet34

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
image_path = data_root + "/data_set/flower_data/"  # flower data set path
batch_size = 16


def validate_model(model: torch.nn.Module):
    validate_dataset = datasets.ImageFolder(root=image_path + "val",
                                            transform=data_transform)
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=2)

    model.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        t1 = time.time()
        for val_data in tqdm(validate_loader, desc="validate model accuracy."):
            val_images, val_labels = val_data
            outputs = model(val_images.to(device))  # eval model only have last output layer
            predict_y = torch.max(outputs, dim=1)[1]
            acc += torch.sum(torch.eq(predict_y, val_labels.to(device))).item()
        val_accurate = acc / val_num
        print('test_accuracy: %.3f, time:%.3f' % (val_accurate, time.time() - t1))

    return val_accurate


def count_sparsity(model: torch.nn.Module, p=True):
    sum_zeros_num = 0
    sum_weights_num = 0
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            zeros_elements = torch.sum(torch.eq(module.weight, 0)).item()
            weights_elements = module.weight.numel()

            sum_zeros_num += zeros_elements
            sum_weights_num += weights_elements
            if p is True:
                print("Sparsity in {}.weights {:.2f}%".format(name, 100 * zeros_elements / weights_elements))
    print("Global sparsity: {:.2f}%".format(100 * sum_zeros_num / sum_weights_num))


def main():
    weights_path = "./resNet34.pth"
    model = resnet34(num_classes=5)
    model.load_state_dict(torch.load(weights_path, map_location=device))
    model.to(device)
    # validate_model(model)
    # module = model.conv1
    # print(list(module.named_parameters()))
    # # print(list(module.named_buffers()))
    #
    # # 裁剪50%的卷积核
    # prune.ln_structured(module, name="weight", amount=0.5, n=2, dim=0)
    # print(list(module.weight))
    # print(module.weight.shape)
    # # print(list(module.named_buffers()))
    #
    # prune.remove(module, "weight")
    # print(module.weight.shape)

    # 收集所有需要裁剪的卷积核
    parameters_to_prune = []
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            parameters_to_prune.append((module, "weight"))

    # 对卷积核进行剪枝处理
    prune.global_unstructured(parameters_to_prune,
                              pruning_method=prune.L1Unstructured,
                              amount=0.5)

    # 统计剪枝比例
    count_sparsity(model, p=False)

    # 验证剪枝后的模型
    validate_model(model)
    # print(model)

    # for name, module in model.named_modules():
    #     if isinstance(module, torch.nn.Conv2d):
    #         prune.remove(module, "weight")
    # validate_model(model)

    # torch.save(model.state_dict(), "pruning_model.pth")


if __name__ == '__main__':
    main()


================================================
FILE: deploying_service/pruning_model_pytorch/model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: deploying_service/pruning_model_pytorch/predict.py
================================================
import torch
from model import resnet34
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import json

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data_transform = transforms.Compose(
    [transforms.Resize(256),
     transforms.CenterCrop(224),
     transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# load image
img = Image.open("../tulip.jpg")
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)

# read class_indict
try:
    json_file = open('./class_indices.json', 'r')
    class_indict = json.load(json_file)
except Exception as e:
    print(e)
    exit(-1)

# create model
model = resnet34(num_classes=5)
# load model weights
model_weight_path = "./resNet34.pth"
model.load_state_dict(torch.load(model_weight_path, map_location=device))
model.eval()
with torch.no_grad():
    # predict class
    output = torch.squeeze(model(img))
    predict = torch.softmax(output, dim=0)
    predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)], predict[predict_cla].numpy())
plt.show()


================================================
FILE: deploying_service/pruning_model_pytorch/train.py
================================================
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import json
import matplotlib.pyplot as plt
import os
import torch.optim as optim
from model import resnet34, resnet101


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

data_transform = {
    "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    "val": transforms.Compose([transforms.Resize(256),
                               transforms.CenterCrop(224),
                               transforms.ToTensor(),
                               transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}


data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
image_path = data_root + "/data_set/flower_data/"  # flower data set path

train_dataset = datasets.ImageFolder(root=image_path+"train",
                                     transform=data_transform["train"])
train_num = len(train_dataset)

# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)

batch_size = 16
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size, shuffle=True,
                                           num_workers=0)

validate_dataset = datasets.ImageFolder(root=image_path + "val",
                                        transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                              batch_size=batch_size, shuffle=False,
                                              num_workers=0)

net = resnet34()
# load pretrain weights
model_weight_path = "./resnet34-pre.pth"
missing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False)
# for param in net.parameters():
#     param.requires_grad = False
# change fc layer structure
inchannel = net.fc.in_features
net.fc = nn.Linear(inchannel, 5)
net.to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

best_acc = 0.0
save_path = './resNet34.pth'
for epoch in range(3):
    # train
    net.train()
    running_loss = 0.0
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        logits = net(images.to(device))
        loss = loss_function(logits, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        # print train process
        rate = (step+1)/len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(int(rate*100), a, b, loss), end="")
    print()

    # validate
    net.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        for val_data in validate_loader:
            val_images, val_labels = val_data
            outputs = net(val_images.to(device))  # eval model only have last output layer
            # loss = loss_function(outputs, test_labels)
            predict_y = torch.max(outputs, dim=1)[1]
            acc += (predict_y == val_labels.to(device)).sum().item()
        val_accurate = acc / val_num
        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)
        print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %
              (epoch + 1, running_loss / step, val_accurate))

print('Finished Training')


================================================
FILE: others_project/draw_dilated_conv/main.py
================================================
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap


def dilated_conv_one_pixel(center: (int, int),
                           feature_map: np.ndarray,
                           k: int = 3,
                           r: int = 1,
                           v: int = 1):
    """
    膨胀卷积核中心在指定坐标center处时，统计哪些像素被利用到，
    并在利用到的像素位置处加上增量v
    Args:
        center: 膨胀卷积核中心的坐标
        feature_map: 记录每个像素使用次数的特征图
        k: 膨胀卷积核的kernel大小
        r: 膨胀卷积的dilation rate
        v: 使用次数增量
    """
    assert divmod(3, 2)[1] == 1

    # left-top: (x, y)
    left_top = (center[0] - ((k - 1) // 2) * r, center[1] - ((k - 1) // 2) * r)
    for i in range(k):
        for j in range(k):
            feature_map[left_top[1] + i * r][left_top[0] + j * r] += v


def dilated_conv_all_map(dilated_map: np.ndarray,
                         k: int = 3,
                         r: int = 1):
    """
    根据输出特征矩阵中哪些像素被使用以及使用次数，
    配合膨胀卷积k和r计算输入特征矩阵哪些像素被使用以及使用次数
    Args:
        dilated_map: 记录输出特征矩阵中每个像素被使用次数的特征图
        k: 膨胀卷积核的kernel大小
        r: 膨胀卷积的dilation rate
    """
    new_map = np.zeros_like(dilated_map)
    for i in range(dilated_map.shape[0]):
        for j in range(dilated_map.shape[1]):
            if dilated_map[i][j] > 0:
                dilated_conv_one_pixel((j, i), new_map, k=k, r=r, v=dilated_map[i][j])

    return new_map


def plot_map(matrix: np.ndarray):
    plt.figure()

    c_list = ['white', 'blue', 'red']
    new_cmp = LinearSegmentedColormap.from_list('chaos', c_list)
    plt.imshow(matrix, cmap=new_cmp)

    ax = plt.gca()
    ax.set_xticks(np.arange(-0.5, matrix.shape[1], 1), minor=True)
    ax.set_yticks(np.arange(-0.5, matrix.shape[0], 1), minor=True)

    # 显示color bar
    plt.colorbar()

    # 在图中标注数量
    thresh = 5
    for x in range(matrix.shape[1]):
        for y in range(matrix.shape[0]):
            # 注意这里的matrix[y, x]不是matrix[x, y]
            info = int(matrix[y, x])
            ax.text(x, y, info,
                    verticalalignment='center',
                    horizontalalignment='center',
                    color="white" if info > thresh else "black")
    ax.grid(which='minor', color='black', linestyle='-', linewidth=1.5)
    plt.show()
    plt.close()


def main():
    # bottom to top
    dilated_rates = [1, 2, 3]
    # init feature map
    size = 31
    m = np.zeros(shape=(size, size), dtype=np.int32)
    center = size // 2
    m[center][center] = 1
    # print(m)
    # plot_map(m)

    for index, dilated_r in enumerate(dilated_rates[::-1]):
        new_map = dilated_conv_all_map(m, r=dilated_r)
        m = new_map
    print(m)
    plot_map(m)


if __name__ == '__main__':
    main()


================================================
FILE: others_project/kmeans_anchors/main.py
================================================
import random
import numpy as np
from tqdm import tqdm
from scipy.cluster.vq import kmeans

from read_voc import VOCDataSet
from yolo_kmeans import k_means, wh_iou


def anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float):  # mutation fitness
    r = wh[:, None] / k[None]
    x = np.minimum(r, 1. / r).min(2)  # ratio metric
    # x = wh_iou(wh, k)  # iou metric
    best = x.max(1)
    f = (best * (best > thr).astype(np.float32)).mean()  # fitness
    bpr = (best > thr).astype(np.float32).mean()  # best possible recall
    return f, bpr


def main(img_size=512, n=9, thr=0.25, gen=1000):
    # 从数据集中读取所有图片的wh以及对应bboxes的wh
    dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt")
    im_wh, boxes_wh = dataset.get_info()

    # 最大边缩放到img_size
    im_wh = np.array(im_wh, dtype=np.float32)
    shapes = img_size * im_wh / im_wh.max(1, keepdims=True)
    wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)])  # wh

    # Filter 过滤掉小目标
    i = (wh0 < 3.0).any(1).sum()
    if i:
        print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
    wh = wh0[(wh0 >= 2.0).any(1)]  # 只保留wh都大于等于2个像素的box

    # Kmeans calculation
    # print(f'Running kmeans for {n} anchors on {len(wh)} points...')
    # s = wh.std(0)  # sigmas for whitening
    # k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
    # assert len(k) == n, print(f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
    # k *= s
    k = k_means(wh, n)

    # 按面积排序
    k = k[np.argsort(k.prod(1))]  # sort small to large
    f, bpr = anchor_fitness(k, wh, thr)
    print("kmeans: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
    print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")

    # Evolve
    # 遗传算法(在kmeans的结果基础上变异mutation)
    npr = np.random
    f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:')  # progress bar
    for _ in pbar:
        v = np.ones(sh)
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg, bpr = anchor_fitness(kg, wh, thr)
        if fg > f:
            f, k = fg, kg.copy()
            pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'

    # 按面积排序
    k = k[np.argsort(k.prod(1))]  # sort small to large
    print("genetic: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
    print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")


if __name__ == "__main__":
    main()


================================================
FILE: others_project/kmeans_anchors/plot_kmeans.py
================================================
import numpy as np
from matplotlib import pyplot as plt
np.random.seed(0)

colors = np.array(['blue', 'black'])


def plot_clusters(data, cls, clusters, title=""):
    if cls is None:
        c = [colors[0]] * data.shape[0]
    else:
        c = colors[cls].tolist()

    plt.scatter(data[:, 0], data[:, 1], c=c)
    for i, clus in enumerate(clusters):
        plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150)
    plt.title(title)
    plt.show()
    plt.close()


def distances(data, clusters):
    xy1 = data[:, None]  # [N,1,2]
    xy2 = clusters[None]  # [1,M,2]
    d = np.sum(np.power(xy2 - xy1, 2), axis=-1)
    return d


def k_means(data, k, dist=np.mean):
    """
    k-means methods
    Args:
        data: 需要聚类的data
        k: 簇数(聚成几类)
        dist: 更新簇坐标的方法
    """
    data_number = data.shape[0]
    last_nearest = np.zeros((data_number,))

    # init k clusters
    clusters = data[np.random.choice(data_number, k, replace=False)]
    print(f"random cluster: \n {clusters}")
    # plot
    plot_clusters(data, None, clusters, "random clusters")

    step = 0
    while True:
        d = distances(data, clusters)
        current_nearest = np.argmin(d, axis=1)

        # plot
        plot_clusters(data, current_nearest, clusters, f"step {step}")
        
        if (last_nearest == current_nearest).all():
            break  # clusters won't change
        for cluster in range(k):
            # update clusters
            clusters[cluster] = dist(data[current_nearest == cluster], axis=0)
        last_nearest = current_nearest
        step += 1

    return clusters


def main():
    x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)]
    x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)]

    x = np.concatenate([x1, x2])
    y = np.concatenate([y1, y2])

    plt.scatter(x, y, c='blue')
    plt.title("initial data")
    plt.show()
    plt.close()

    clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2)
    print(f"k-means fluster: \n {clusters}")


if __name__ == '__main__':
    main()


================================================
FILE: others_project/kmeans_anchors/read_voc.py
================================================
import os
from tqdm import tqdm
from lxml import etree


class VOCDataSet(object):
    def __init__(self, voc_root, year="2012", txt_name: str = "train.txt"):
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        self.annotations_root = os.path.join(self.root, "Annotations")

        # read train.txt or val.txt file
        txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name)
        assert os.path.exists(txt_path), "not found {} file.".format(txt_name)

        with open(txt_path) as read:
            self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
                             for line in read.readlines() if len(line.strip()) > 0]

        # check file
        assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)
        for xml_path in self.xml_list:
            assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path)

    def __len__(self):
        return len(self.xml_list)

    def parse_xml_to_dict(self, xml):
        """
        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
        Args:
            xml: xml tree obtained by parsing XML file contents using lxml.etree

        Returns:
            Python dictionary holding XML contents.
        """

        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
            return {xml.tag: xml.text}

        result = {}
        for child in xml:
            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}

    def get_info(self):
        im_wh_list = []
        boxes_wh_list = []
        for xml_path in tqdm(self.xml_list, desc="read data info."):
            # read xml
            with open(xml_path) as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = self.parse_xml_to_dict(xml)["annotation"]
            im_height = int(data["size"]["height"])
            im_width = int(data["size"]["width"])

            wh = []
            for obj in data["object"]:
                xmin = float(obj["bndbox"]["xmin"])
                xmax = float(obj["bndbox"]["xmax"])
                ymin = float(obj["bndbox"]["ymin"])
                ymax = float(obj["bndbox"]["ymax"])
                wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height])

            if len(wh) == 0:
                continue

            im_wh_list.append([im_width, im_height])
            boxes_wh_list.append(wh)

        return im_wh_list, boxes_wh_list


================================================
FILE: others_project/kmeans_anchors/yolo_kmeans.py
================================================
import numpy as np


def wh_iou(wh1, wh2):
    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
    wh1 = wh1[:, None]  # [N,1,2]
    wh2 = wh2[None]  # [1,M,2]
    inter = np.minimum(wh1, wh2).prod(2)  # [N,M]
    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)


def k_means(boxes, k, dist=np.median):
    """
    yolo k-means methods
    refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
    Args:
        boxes: 需要聚类的bboxes
        k: 簇数(聚成几类)
        dist: 更新簇坐标的方法(默认使用中位数，比均值效果略好)
    """
    box_number = boxes.shape[0]
    last_nearest = np.zeros((box_number,))
    # np.random.seed(0)  # 固定随机数种子

    # init k clusters
    clusters = boxes[np.random.choice(box_number, k, replace=False)]

    while True:
        distances = 1 - wh_iou(boxes, clusters)
        current_nearest = np.argmin(distances, axis=1)
        if (last_nearest == current_nearest).all():
            break  # clusters won't change
        for cluster in range(k):
            # update clusters
            clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)

        last_nearest = current_nearest

    return clusters


================================================
FILE: others_project/openvinotest/openvino_cls_test/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: others_project/openvinotest/openvino_cls_test/create_imagenet_annotation.py
================================================
import os
import glob

image_dir = "/home/w180662/my_project/my_github/data_set/flower_data/train"
assert os.path.exists(image_dir), "image dir does not exist..."

img_list = glob.glob(os.path.join(image_dir, "*", "*.jpg"))
assert len(img_list) > 0, "No images(.jpg) were found in image dir..."

classes_info = os.listdir(image_dir)
classes_info.sort()
classes_dict = {}

# create label file
with open("my_labels.txt", "w") as lw:
    # 注意，没有背景时，index要从0开始
    for index, c in enumerate(classes_info, start=0):
        txt = "{}:{}".format(index, c)
        if index != len(classes_info):
            txt += "\n"
        lw.write(txt)
        classes_dict.update({c: str(index)})
print("create my_labels.txt successful...")

# create annotation file
with open("my_annotation.txt", "w") as aw:
    for img in img_list:
        img_classes = classes_dict[img.split("/")[-2]]
        txt = "{} {}".format(img, img_classes)
        if index != len(img_list):
            txt += "\n"
        aw.write(txt)
print("create my_annotation.txt successful...")


================================================
FILE: others_project/openvinotest/openvino_cls_test/float32vsint8.py
================================================
import os
import time
import torch
from torchvision import transforms, datasets
from tqdm import tqdm
import numpy as np
from openvino.inference_engine import IECore

device = torch.device("cpu")


def check_path_exist(path):
    assert os.path.exists(path), "{} does not exist...".format(path)


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def openvino_model_speed(data_loader, val_num, xml_path, bin_path):
    device = "CPU"
    model_xml_path = xml_path
    model_bin_path = bin_path
    check_path_exist(model_xml_path)
    check_path_exist(model_bin_path)

    # inference engine
    ie = IECore()

    # read IR
    net = ie.read_network(model=model_xml_path, weights=model_bin_path)
    # load model
    exec_net = ie.load_network(network=net, device_name=device)

    # check supported layers for device
    if device == "CPU":
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) > 0:
            print("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                  "or --cpu_extension command line argument")
            raise ValueError("device {} not support layers:\n {}".format(device,
                                                                         ",".join(not_supported_layers)))

    # get input and output name
    input_blob = next(iter(net.input_info))
    output_blob = next(iter(net.outputs))

    # set batch size
    batch_size = 1
    net.batch_size = batch_size

    # read and pre-process input images
    # n, c, h, w = net.input_info[input_blob].input_data.shape
    forward_time = 0
    acc = 0.0  # accumulate accurate number / epoch
    for val_data in tqdm(data_loader, desc="Running onnx model..."):
        val_images, val_labels = val_data
        input_dict = {input_blob: to_numpy(val_images)}
        # start sync inference
        t1 = time.time()
        res = exec_net.infer(inputs=input_dict)
        t2 = time.time()
        forward_time += (t2 - t1)
        outputs = res[output_blob]
        predict_y = np.argmax(outputs, axis=1)
        acc += (predict_y == to_numpy(val_labels)).sum()
    val_accurate = acc / val_num
    fps = round(val_num / forward_time, 1)
    print("openvino info:\nfps: {}/s  accuracy: {}\n".format(fps,
                                                             val_accurate))


def main():
    data_transform = transforms.Compose([transforms.Resize([224, 224]),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    data_root = "/home/w180662/my_project/my_github"  # get data root path
    image_path = os.path.join(data_root, "data_set/flower_data/")  # flower data set path
    check_path_exist(image_path)

    batch_size = 1

    validate_dataset = datasets.ImageFolder(root=image_path + "val",
                                            transform=data_transform)
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=4)

    openvino_model_speed(validate_loader, val_num, "./resnet34.xml", "./resnet34.bin")
    openvino_model_speed(validate_loader, val_num, "./resnet34a.xml", "./resnet34a.bin")


if __name__ == '__main__':
    main()


================================================
FILE: others_project/openvinotest/openvino_cls_test/main.py
================================================
import sys
import cv2
import os
import glob
import json
import numpy as np
import logging as log
from openvino.inference_engine import IECore


def main():
    device = "CPU"
    model_xml_path = "./resnet34.xml"
    model_bin_path = "./resnet34.bin"
    image_path = "./"
    class_json_path = './class_indices.json'

    # set log format
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)

    assert os.path.exists(model_xml_path), ".xml file does not exist..."
    assert os.path.exists(model_bin_path), ".bin file does not exist..."

    # search *.jpg files
    image_list = glob.glob(os.path.join(image_path, "*.jpg"))
    assert len(image_list) > 0, "no image(.jpg) be found..."

    # load class label
    assert os.path.exists(class_json_path), "class_json_path does not exist..."
    json_file = open(class_json_path, 'r')
    class_indict = json.load(json_file)

    # inference engine
    ie = IECore()

    # read IR
    net = ie.read_network(model=model_xml_path, weights=model_bin_path)
    # load model
    exec_net = ie.load_network(network=net, device_name=device)

    # check supported layers for device
    if device == "CPU":
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) > 0:
            log.error("device {} not support layers:\n {}".format(device,
                                                                  ",".join(not_supported_layers)))
            log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                      "or --cpu_extension command line argument")
            sys.exit(1)

    # get input and output name
    input_blob = next(iter(net.input_info))
    output_blob = next(iter(net.outputs))

    # set batch size
    batch_size = 1
    net.batch_size = batch_size

    # read and pre-process input images
    n, c, h, w = net.input_info[input_blob].input_data.shape
    # images = np.ndarray(shape=(n, c, h, w))
    # inference every image
    for i in range(len(image_list)):
        image = cv2.imread(image_list[i])
        if image.shape[:-1] != (h, w):
            image = cv2.resize(image, (w, h))
        # bgr(opencv default format) -> rgb
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # pre-process
        image = (image / 255.).astype(np.float32)
        image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
        # change data from HWC to CHW
        image = image.transpose((2, 0, 1))
        # add batch dimension
        image = np.expand_dims(image, axis=0)

        # start sync inference
        res = exec_net.infer(inputs={input_blob: image})
        prediction = np.squeeze(res[output_blob])
        # print(prediction)

        # np softmax process
        prediction -= np.max(prediction, keepdims=True)  # 为了稳定地计算softmax概率， 一般会减掉最大元素
        prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True)
        class_index = np.argmax(prediction, axis=0)
        print("prediction: '{}'\nclass:{}  probability:{}\n".format(image_list[i],
                                                                    class_indict[str(class_index)],
                                                                    np.around(prediction[class_index]), 2))


if __name__ == '__main__':
    main()


================================================
FILE: others_project/openvinotest/openvino_cls_test/model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: others_project/openvinotest/openvino_cls_test/speed_test.py
================================================
import os
import time
import torch
from torchvision import transforms, datasets
from tqdm import tqdm
import onnx
import onnxruntime
import numpy as np
from openvino.inference_engine import IECore
from model import resnet34

device = torch.device("cpu")


def check_path_exist(path):
    assert os.path.exists(path), "{} does not exist...".format(path)


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def pytorch_model_speed(data_loader, val_num):
    net = resnet34(num_classes=5)
    # load weights
    model_weight_path = "./resNet34.pth"
    check_path_exist(model_weight_path)
    net.load_state_dict(torch.load(model_weight_path, map_location=device), strict=False)
    net.eval()
    test_data = torch.rand((1, 3, 224, 224))
    net(test_data.to(device))

    forward_time = 0
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        for val_data in tqdm(data_loader, desc="Running pytorch model..."):
            val_images, val_labels = val_data
            t1 = time.time()
            outputs = net(val_images.to(device))  # eval model only have last output layer
            t2 = time.time()
            forward_time += (t2 - t1)
            predict_y = torch.max(outputs, dim=1)[1]
            acc += (predict_y == val_labels.to(device)).sum().item()
        val_accurate = acc / val_num
    fps = round(val_num / forward_time, 1)
    print("pytorch info:\nfps: {}/s  accuracy: {}\n".format(fps,
                                                            val_accurate))
    return fps, val_accurate, "Pytorch(not opt)"


def onnx_model_speed(data_loader, val_num):
    # check onnx model
    onnx_path = "./resnet34.onnx"
    check_path_exist(onnx_path)
    onnx_model = onnx.load(onnx_path)
    onnx.checker.check_model(onnx_model)

    ort_session = onnxruntime.InferenceSession(onnx_path)
    input_name = ort_session.get_inputs()[0].name

    forward_time = 0
    acc = 0.0  # accumulate accurate number / epoch
    for val_data in tqdm(data_loader, desc="Running onnx model..."):
        val_images, val_labels = val_data
        input_dict = {input_name: to_numpy(val_images)}
        t1 = time.time()
        outputs = ort_session.run(None, input_dict)
        t2 = time.time()
        forward_time += (t2 - t1)
        outputs = outputs[0]
        predict_y = np.argmax(outputs, axis=1)
        acc += (predict_y == to_numpy(val_labels)).sum()
    val_accurate = acc / val_num
    fps = round(val_num / forward_time, 1)
    print("onnx info:\nfps: {}/s  accuracy: {}\n".format(fps,
                                                         val_accurate))
    return fps, val_accurate, "ONNX"


def openvino_model_speed(data_loader, val_num):
    device = "CPU"
    model_xml_path = "./resnet34r.xml"
    model_bin_path = "./resnet34r.bin"
    check_path_exist(model_xml_path)
    check_path_exist(model_bin_path)

    # inference engine
    ie = IECore()

    # read IR
    net = ie.read_network(model=model_xml_path, weights=model_bin_path)
    # load model
    exec_net = ie.load_network(network=net, device_name=device)

    # check supported layers for device
    if device == "CPU":
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) > 0:
            print("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                  "or --cpu_extension command line argument")
            raise ValueError("device {} not support layers:\n {}".format(device,
                                                                         ",".join(not_supported_layers)))

    # get input and output name
    input_blob = next(iter(net.input_info))
    output_blob = next(iter(net.outputs))

    # set batch size
    batch_size = 1
    net.batch_size = batch_size

    # read and pre-process input images
    # n, c, h, w = net.input_info[input_blob].input_data.shape
    forward_time = 0
    acc = 0.0  # accumulate accurate number / epoch
    for val_data in tqdm(data_loader, desc="Running onnx model..."):
        val_images, val_labels = val_data
        input_dict = {input_blob: to_numpy(val_images)}
        # start sync inference
        t1 = time.time()
        res = exec_net.infer(inputs=input_dict)
        t2 = time.time()
        forward_time += (t2 - t1)
        outputs = res[output_blob]
        predict_y = np.argmax(outputs, axis=1)
        acc += (predict_y == to_numpy(val_labels)).sum()
    val_accurate = acc / val_num
    fps = round(val_num / forward_time, 1)
    print("openvino info:\nfps: {}/s  accuracy: {}\n".format(fps,
                                                             val_accurate))


def main():
    data_transform = transforms.Compose([transforms.Resize([224, 224]),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    data_root = "/home/w180662/my_project/my_github"  # get data root path
    image_path = os.path.join(data_root, "data_set/flower_data/")  # flower data set path
    check_path_exist(image_path)

    batch_size = 1

    validate_dataset = datasets.ImageFolder(root=image_path + "val",
                                            transform=data_transform)
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=4)

    pytorch_model_speed(validate_loader, val_num)
    onnx_model_speed(validate_loader, val_num)
    openvino_model_speed(validate_loader, val_num)


if __name__ == '__main__':
    main()


================================================
FILE: others_project/readPbFile/README.md
================================================
该项目用于读取冻结后的pb文件并进行预测  
使用步骤：   
（1）准备好需要使用的pb冻结文件，pbtxt标签文件，测试用的图片  
（2）修改info.config文件中的相关信息  

![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example1.jpg)     
![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example2.jpg)

================================================
FILE: others_project/readPbFile/pascal_label_map.pbtxt
================================================
item {
  id: 1
  name: 'aeroplane'
}

item {
  id: 2
  name: 'bicycle'
}

item {
  id: 3
  name: 'bird'
}

item {
  id: 4
  name: 'boat'
}

item {
  id: 5
  name: 'bottle'
}

item {
  id: 6
  name: 'bus'
}

item {
  id: 7
  name: 'car'
}

item {
  id: 8
  name: 'cat'
}

item {
  id: 9
  name: 'chair'
}

item {
  id: 10
  name: 'cow'
}

item {
  id: 11
  name: 'diningtable'
}

item {
  id: 12
  name: 'dog'
}

item {
  id: 13
  name: 'horse'
}

item {
  id: 14
  name: 'motorbike'
}

item {
  id: 15
  name: 'person'
}

item {
  id: 16
  name: 'pottedplant'
}

item {
  id: 17
  name: 'sheep'
}

item {
  id: 18
  name: 'sofa'
}

item {
  id: 19
  name: 'train'
}

item {
  id: 20
  name: 'tvmonitor'
}


================================================
FILE: others_project/readPbFile/readPb.py
================================================
import tensorflow as tf
import configparser
from distutils.version import StrictVersion
import cv2
import glob
from using_function import draw_box, read_pbtxt, get_inAndout_tensor, convert_type, read_image

if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
    raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')

# 读取参数配置文件
conf = configparser.ConfigParser()
conf.read('info.config')
path_to_frozen_graph = conf.get('tensorflow', 'path_to_frozen_graph')
path_to_labels = conf.get('tensorflow', 'path_to_labels')
path_to_images = conf.get('tensorflow', 'path_to_images')
probability_thresh = float(conf.get('tensorflow', 'probability_thresh'))

# 读取pbtxt标签信息
category_index = read_pbtxt(path_to_labels)

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

with detection_graph.as_default():
    with tf.Session() as sess:
        # Get handles to input and output tensors
        tensor_dict, image_tensor = get_inAndout_tensor()
        test_image_paths = glob.glob(path_to_images)
        for image_path in test_image_paths:
            image_BGR, image_np_expanded = read_image(image_path)

            # Run inference
            output_dict = sess.run(tensor_dict,
                                   feed_dict={image_tensor: image_np_expanded})
            # all outputs are float32 numpy arrays, so convert types as appropriate
            convert_type(output_dict)

            draw_box(image_BGR,
                     output_dict['detection_boxes'],
                     output_dict['detection_classes'],
                     output_dict['detection_scores'],
                     category_index,
                     thresh=probability_thresh,
                     line_thickness=5)
            cv2.namedWindow("prediction", cv2.WINDOW_AUTOSIZE)
            cv2.imshow("prediction", image_BGR)
            cv2.waitKey(0)


================================================
FILE: others_project/readPbFile/test_images/image_info.txt
================================================

Image provenance:
image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg
image2.jpg: Michael Miley,
  https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4


================================================
FILE: others_project/readPbFile/using_function.py
================================================
import collections
import six
import PIL.Image as Image
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
import numpy as np
import tensorflow as tf
import cv2

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def filter_low_thresh(boxes, scores, classes, category_index, thresh, box_to_display_str_map, box_to_color_map):
    for i in range(boxes.shape[0]):
        if scores[i] > thresh:
            box = tuple(boxes[i].tolist())
            if classes[i] in six.viewkeys(category_index):
                class_name = category_index[classes[i]]['name']
            else:
                class_name = 'N/A'
            display_str = str(class_name)
            display_str = '{}: {}%'.format(display_str, int(100 * scores[i]))
            box_to_display_str_map[box].append(display_str)
            box_to_color_map[box] = STANDARD_COLORS[
                classes[i] % len(STANDARD_COLORS)]
        else:
            break  # 网络输出概率已经排序过，当遇到一个不满足后面的肯定不满足


def draw_text(draw, box_to_display_str_map, box, left, right, top, bottom, color):
    try:
        font = ImageFont.truetype('arial.ttf', 24)
    except IOError:
        font = ImageFont.load_default()

    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str_heights = [font.getsize(ds)[1] for ds in box_to_display_str_map[box]]
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = bottom + total_display_str_height
    # Reverse list and print from bottom to top.
    for display_str in box_to_display_str_map[box][::-1]:
        text_width, text_height = font.getsize(display_str)
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                        (left + text_width, text_bottom)], fill=color)
        draw.text((left + margin, text_bottom - text_height - margin),
                  display_str,
                  fill='black',
                  font=font)
        text_bottom -= text_height - 2 * margin


def draw_box(image, boxes, classes, scores, category_index, thresh=0.5, line_thickness=8):
    box_to_display_str_map = collections.defaultdict(list)
    box_to_color_map = collections.defaultdict(str)

    filter_low_thresh(boxes, scores, classes, category_index, thresh, box_to_display_str_map, box_to_color_map)

    # Draw all boxes onto image.
    for box, color in box_to_color_map.items():
        ymin, xmin, ymax, xmax = box
        image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
        draw = ImageDraw.Draw(image_pil)
        im_width, im_height = image_pil.size
        (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                      ymin * im_height, ymax * im_height)
        draw.line([(left, top), (left, bottom), (right, bottom),
                   (right, top), (left, top)], width=line_thickness, fill=color)
        draw_text(draw, box_to_display_str_map, box, left, right, top, bottom, color)
        np.copyto(image, np.array(image_pil))
    return image


def read_pbtxt(filename):
    category_index = {}
    with open(filename, 'r') as reader:
        txt = str(reader.read())
        txt = txt.replace(" ", "").replace("{", "").replace("}", "")
        txtList = txt.split("item")[1:]
        for index, line in enumerate(txtList):
            line = line.strip("\n").split('\n')
            category_index[index + 1] = {'id': int(line[0][3:]), 'name': line[1][6: -1]}
    return category_index


def get_inAndout_tensor():
    ops = tf.get_default_graph().get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    tensor_dict = {}
    outputKeys = ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes']
    for key in outputKeys:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
            tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                tensor_name)
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
    return tensor_dict, image_tensor


def convert_type(output_dict):
    output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict['detection_classes'] = output_dict[
        'detection_classes'][0].astype(np.int64)
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
    output_dict['detection_scores'] = output_dict['detection_scores'][0]


def read_image(image_path):
    image_BGR = cv2.imread(image_path)
    image_RGB = np.zeros_like(image_BGR)
    cv2.cvtColor(image_BGR, cv2.COLOR_BGR2RGB, image_RGB)
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_RGB, axis=0)
    return image_BGR, image_np_expanded


================================================
FILE: others_project/textcnnKeras/dataGenerator.py
================================================
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
import random


def content2idList(content, word2id_dict):
    """
    该函数的目的是将文本转换为对应的汉字数字id
    content：输入的文本
    word2id_dict：用于查找转换的字典
    """
    idList = []
    for word in content:  # 遍历每一个汉字
        if word in word2id_dict:  # 当刚文字在字典中时才进行转换，否则丢弃
            idList.append(word2id_dict[word])
    return idList


def generatorInfo(batch_size, seq_length, num_classes, file_name):
    """
    batch_size：生成数据的batch size
    seq_length：输入文字序列长度
    num_classes：文本的类别数
    file_name：读取文件的路径
    """
    # 读取词库文件
    with open('./cnews/cnews.vocab.txt', encoding='utf-8') as file:
        vocabulary_list = [k.strip() for k in file.readlines()]
    word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)])

    # 读取文本文件
    with open(file_name, encoding='utf-8') as file:
        line_list = [k.strip() for k in file.readlines()]
        data_label_list = []  # 创建数据标签文件
        data_content_list = []  # 创建数据文本文件
        for k in line_list:
            t = k.split(maxsplit=1)
            data_label_list.append(t[0])
            data_content_list.append(t[1])

    data_id_list = [content2idList(content, word2id_dict) for content in data_content_list]  # 将文本数据转换拿为数字序列
    # 将list数据类型转换为ndarray数据类型，并按照seq_length长度去统一化文本序列长度，
    # 若长度超过设定值将其截断保留后半部分，若长度不足前面补0
    data_X = keras.preprocessing.sequence.pad_sequences(data_id_list, seq_length, truncating='pre')
    labelEncoder = LabelEncoder()
    data_y = labelEncoder.fit_transform(data_label_list)  # 将文字标签转为数字标签
    data_Y = keras.utils.to_categorical(data_y, num_classes)  # 将数字标签转为one-hot标签

    while True:
        selected_index = random.sample(list(range(len(data_y))), k=batch_size)  # 按照数据集合的长度随机抽取batch_size个数据的index
        batch_X = data_X[selected_index]  # 随机抽取的文本信息（数字化序列）
        batch_Y = data_Y[selected_index]  # 随机抽取的标签信息（one-hot编码）
        yield (batch_X, batch_Y)


================================================
FILE: others_project/textcnnKeras/data_link.txt
================================================
baidupan_url = "https://pan.baidu.com/s/1w452Z5eXbQSDQfgEBNUdlg"
extract_code = "8cwv"

================================================
FILE: others_project/textcnnKeras/main.py
================================================
from models import text_cnn, simpleNet, text_cnn_V2
from dataGenerator import generatorInfo
from tensorflow import keras

vocab_size = 5000
seq_length = 600
embedding_dim = 64
num_classes = 10
trainBatchSize = 64
evalBatchSize = 200
steps_per_epoch = 50000 // trainBatchSize
epoch = 2
workers = 4
logdir = './log/'
trainFileName = './cnews/cnews.train.txt'
evalFileName = './cnews/cnews.test.txt'

model = text_cnn(seq_length=seq_length,
                 vocab_size=vocab_size,
                 embedding_dim=embedding_dim,
                 num_cla=num_classes,
                 kernelNum=64)

trainGenerator = generatorInfo(trainBatchSize, seq_length, num_classes, trainFileName)
evalGenerator = generatorInfo(evalBatchSize, seq_length, num_classes, evalFileName)


def lrSchedule(epoch):
    lr = keras.backend.get_value(model.optimizer.lr)
    if epoch % 1 == 0 and epoch != 0:
        lr = lr * 0.5
    return lr


log = keras.callbacks.TensorBoard(log_dir=logdir, update_freq=500)
reduceLr = keras.callbacks.LearningRateScheduler(lrSchedule, verbose=1)

model.fit_generator(generator=trainGenerator,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epoch,
                    validation_data=evalGenerator,
                    validation_steps=10,
                    workers=1,
                    callbacks=[log, reduceLr])
model.save_weights(logdir + 'train_weight.h5')


================================================
FILE: others_project/textcnnKeras/models.py
================================================
from tensorflow import keras


def text_cnn(seq_length, vocab_size, embedding_dim, num_cla, kernelNum):
    """
    :param seq_length:  输入的文字序列长度
    :param vocab_size:  词汇库的大小
    :param embedding_dim:  生成词向量的特征维度
    :param num_cla: 分类类别
    :return: keras model
    """
    inputX = keras.layers.Input(shape=(seq_length,), dtype='int32')
    embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX)
    # 分别使用长度为3,4,5的词窗去执行卷积
    conv1 = keras.layers.Conv1D(kernelNum, 3, padding='valid', strides=1, activation='relu')(embOut)
    maxp1 = keras.layers.MaxPool1D(pool_size=int(conv1.shape[1]))(conv1)

    conv2 = keras.layers.Conv1D(kernelNum, 4, padding='valid', strides=1, activation='relu')(embOut)
    maxp2 = keras.layers.MaxPool1D(pool_size=int(conv2.shape[1]))(conv2)

    conv3 = keras.layers.Conv1D(kernelNum, 5, padding='valid', strides=1, activation='relu')(embOut)
    maxp3 = keras.layers.MaxPool1D(pool_size=int(conv3.shape[1]))(conv3)

    # 合并三个模型的输出向量
    cnn = keras.layers.Concatenate(axis=-1)([maxp1, maxp2, maxp3])
    flat = keras.layers.Flatten()(cnn)
    dense1 = keras.layers.Dense(128)(flat)
    drop = keras.layers.Dropout(0.25)(dense1)
    denseRelu = keras.layers.ReLU()(drop)
    predictY = keras.layers.Dense(num_cla, activation='softmax')(denseRelu)
    # 编译模型
    model = keras.models.Model(inputs=inputX, outputs=predictY)
    # 指定loss的计算方法，设置优化器，编译模型
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def text_cnn_V2(seq_length, vocab_size, embedding_dim, num_cla, kernelNum=128):
    """
    :param seq_length:  输入的文字序列长度
    :param vocab_size:  词汇库的大小
    :param embedding_dim:  生成词向量的特征维度
    :param num_cla: 分类类别
    :return: keras model
    """
    inputX = keras.layers.Input(shape=(seq_length,), dtype='int32')
    embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX)
    # 分别使用长度为3,4,5的词窗去执行卷积
    conv1 = keras.layers.Conv1D(kernelNum, 3, padding='valid', strides=1, activation='relu')(embOut)
    maxp1 = keras.layers.SeparableConv1D(filters=int(conv1.shape[2]), kernel_size=int(conv1.shape[1]))(conv1)

    conv2 = keras.layers.Conv1D(kernelNum, 4, padding='valid', strides=1, activation='relu')(embOut)
    maxp2 = keras.layers.SeparableConv1D(filters=int(conv2.shape[2]), kernel_size=int(conv2.shape[1]))(conv2)

    conv3 = keras.layers.Conv1D(kernelNum, 5, padding='valid', strides=1, activation='relu')(embOut)
    maxp3 = keras.layers.SeparableConv1D(filters=int(conv3.shape[2]), kernel_size=int(conv3.shape[1]))(conv3)

    # 合并三个模型的输出向量
    cnn = keras.layers.Concatenate(axis=2)([maxp1, maxp2, maxp3])
    bn = keras.layers.BatchNormalization()(cnn)
    conv4 = keras.layers.Conv1D(num_cla, kernel_size=int(cnn.shape[1]), activation='softmax')(bn)
    # predictY = keras.layers.Lambda(keras.backend.squeeze, arguments={'axis': 1})(conv4)
    predictY = keras.layers.Flatten()(conv4)
    # 编译模型
    model = keras.models.Model(inputs=inputX, outputs=predictY)
    # 指定loss的计算方法，设置优化器，编译模型
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def simpleNet(seq_length, vocab_size, embedding_dim, num_cla, kernelNum=128):
    inputX = keras.layers.Input(shape=(seq_length,), dtype='int32')
    embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX)
    # 使用长度为5的词窗去执行卷积
    conv1 = keras.layers.Conv1D(kernelNum, 5, padding='same', strides=1)(embOut)
    maxp1 = keras.layers.MaxPool1D(pool_size=int(conv1.shape[1]))(conv1)
    flat = keras.layers.Flatten()(maxp1)
    dense1 = keras.layers.Dense(128)(flat)
    drop = keras.layers.Dropout(0.25)(dense1)
    denseRelu = keras.layers.ReLU()(drop)
    predictY = keras.layers.Dense(num_cla, activation='softmax')(denseRelu)
    # 编译模型
    model = keras.models.Model(inputs=inputX, outputs=predictY)
    # 指定loss的计算方法，设置优化器，编译模型
    model.compile(optimizer=keras.optimizers.Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


================================================
FILE: others_project/trans_widerface_to_xml/create_xml.py
================================================
import copy
import os
from xml.dom import minidom as dom


class XMLGenerator(object):
    def __init__(self, xml_name: str):
        self.doc = dom.Document()
        self.xml_name = xml_name

    def create_append_node(self, node_name, root_node=None):
        """创建一个新node并将node添加到root_node下"""
        new_node = self.doc.createElement(node_name)
        if root_node is not None:
            root_node.appendChild(new_node)
        else:
            self.doc.appendChild(new_node)
        return new_node

    def create_text_node(self, node_name, node_value, root_node):
        """
        创建一个新node，然后在该node中添加一个text_node，
        最后将node添加到root_node下
        """
        new_node = self.doc.createElement(node_name)
        node_data = self.doc.createTextNode(node_value)
        new_node.appendChild(node_data)
        root_node.appendChild(new_node)

    def create_object_node(self, info_dict: dict = None, root_node: str = None):
        if (info_dict is None) or (root_node is None):
            return

        object_node = self.create_append_node('object', root_node)
        box_node = self.create_append_node('bndbox', object_node)
        self.create_text_node("xmin", info_dict.pop("xmin"), box_node)
        self.create_text_node("ymin", info_dict.pop("ymin"), box_node)
        self.create_text_node("xmax", info_dict.pop("xmax"), box_node)
        self.create_text_node("ymax", info_dict.pop("ymax"), box_node)

        for k, v in info_dict.items():
            self.create_text_node(k, v, object_node)

    def save_xml(self):
        f = open(self.xml_name, "w")
        self.doc.writexml(f, addindent="\t", newl="\n")
        f.close()


def create_pascal_voc_xml(filename: str = None,
                          years: str = 'VOC2012',
                          source_dict: dict = None,
                          objects_list: list = None,
                          im_shape: tuple = None,
                          save_root: str = os.getcwd(),
                          cover: bool = False):
    if not (filename and source_dict and objects_list and im_shape):
        return

    # 0--Parade/0_Parade_marchingband_1_849.jpg -> 0_Parade_marchingband_1_849.xml
    xml_name = filename.split(os.sep)[-1].split(".")[0] + '.xml'
    xml_full_path = os.path.join(save_root, xml_name)
    if os.path.exists(xml_full_path) and (cover is False):
        print(f"{xml_full_path} already exist, skip.")
        return

    xml_generator = XMLGenerator(xml_full_path)

    # xml root node
    node_root = xml_generator.create_append_node('annotation')
    xml_generator.create_text_node(node_name='folder', node_value=years, root_node=node_root)
    xml_generator.create_text_node(node_name='filename', node_value=filename, root_node=node_root)

    # source
    node_source = xml_generator.create_append_node('source', root_node=node_root)
    xml_generator.create_text_node(node_name='database', node_value=source_dict['database'], root_node=node_source)
    xml_generator.create_text_node(node_name='annotation', node_value=source_dict['annotation'], root_node=node_source)
    xml_generator.create_text_node(node_name='image', node_value=source_dict['image'], root_node=node_source)

    # size
    node_size = xml_generator.create_append_node('size', root_node=node_root)
    xml_generator.create_text_node(node_name='height', node_value=str(im_shape[0]), root_node=node_size)
    xml_generator.create_text_node(node_name='width', node_value=str(im_shape[1]), root_node=node_size)
    xml_generator.create_text_node(node_name='depth', node_value=str(im_shape[2]), root_node=node_size)

    # segmented
    xml_generator.create_text_node(node_name='segmented', node_value='0', root_node=node_root)

    # object
    for i, ob in enumerate(objects_list):
        xml_generator.create_object_node(info_dict=ob, root_node=node_root)

    # XML write
    xml_generator.save_xml()


def create_xml_test():
    objects = []
    ob = {'name': 'person', 'pose': 'Unspecified', 'truncated': '0', 'difficult': '0',
          'xmin': '174', 'ymin': '101', 'xmax': '349', 'ymax': '351'}
    objects.append(ob)
    objects.append(copy.deepcopy(ob))

    years = 'VOC2012'
    filename = 'test.jpg'
    source_dict = {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr'}
    im_width = '500'
    im_height = '700'
    im_depth = '3'
    im_shape = (im_width, im_height, im_depth)
    create_pascal_voc_xml(filename=filename, years=years,
                          source_dict=source_dict, objects_list=objects,
                          im_shape=im_shape)


================================================
FILE: others_project/trans_widerface_to_xml/main.py
================================================
import os

from tqdm import tqdm
import cv2
from create_xml import create_pascal_voc_xml


def create_xml(labels: list, img_root: str, img_path: str, save_root: str) -> bool:
    source_dict = {'database': 'The WIDERFACE2017 Database',
                   'annotation': 'WIDERFACE 2017',
                   'image': 'WIDERFACE'}

    img_full_path = os.path.join(img_root, img_path)
    if os.path.exists(img_full_path):
        im = cv2.imread(img_full_path)
        im_shape = im.shape
    else:
        print(f"Warning: {img_path} does not exist, can't read image shape.")
        im_shape = (0, 0, 0)

    ob_list = []
    for ob in labels:
        if ob[7] == '1':
            # invalid face image, skip
            continue

        if int(ob[2]) <= 0 or int(ob[3]) <= 0:
            print(f"Warning: find bbox w or h <= 0, in {img_path}, skip.")
            continue

        ob_dict = {'name': 'face',
                   'truncated': '0' if ob[8] == '0' else '1',
                   'difficult': '1' if ob[4] == '2' or ob[8] == '2' else '0',
                   'xmin': ob[0], 'ymin': ob[1],
                   'xmax': str(int(ob[0]) + int(ob[2])),
                   'ymax': str(int(ob[1]) + int(ob[3])),
                   'blur': ob[4], 'expression': ob[5],
                   'illumination': ob[6], 'invalid': ob[7],
                   'occlusion': ob[8], 'pose': ob[9]}

        # if ob[7] == '1':
        #     cv2.rectangle(im, (int(ob_dict['xmin']), int(ob_dict['ymin'])),
        #                   (int(ob_dict['xmax']), int(ob_dict['ymax'])),
        #                   (0, 0, 255))
        #     cv2.imshow("s", im)
        #     cv2.waitKey(0)

        ob_list.append(ob_dict)
    
    if len(ob_list) == 0: 
        print(f"in {img_path}, no object, skip.")
        return False

    create_pascal_voc_xml(filename=img_path,
                          years="WIDERFACE2017",
                          source_dict=source_dict,
                          objects_list=ob_list,
                          im_shape=im_shape,
                          save_root=save_root)

    return True


def parse_wider_txt(data_root: str, split: str, save_root: str):
    """
    refer to: torchvision.dataset.widerface.py
    :param data_root:
    :param split:
    :param save_root:
    :return:
    """
    assert split in ['train', 'val'], f"split must be in ['train', 'val'], got {split}"

    if os.path.exists(save_root) is False:
        os.makedirs(save_root)

    txt_path = os.path.join(data_root, 'wider_face_split', f'wider_face_{split}_bbx_gt.txt')
    img_root = os.path.join(data_root, f'WIDER_{split}', 'images')
    with open(txt_path, "r") as f:
        lines = f.readlines()
        file_name_line, num_boxes_line, box_annotation_line = True, False, False
        num_boxes, box_counter, idx = 0, 0, 0
        labels = []
        xml_list = []
        progress_bar = tqdm(lines)
        for line in progress_bar:
            line = line.rstrip()
            if file_name_line:
                img_path = line
                file_name_line = False
                num_boxes_line = True
            elif num_boxes_line:
                num_boxes = int(line)
                num_boxes_line = False
                box_annotation_line = True
            elif box_annotation_line:
                box_counter += 1
                line_split = line.split(" ")
                line_values = [x for x in line_split]
                labels.append(line_values)
                if box_counter >= num_boxes:
                    box_annotation_line = False
                    file_name_line = True

                    if num_boxes == 0:
                        print(f"in {img_path}, no object, skip.")
                    else:
                        if create_xml(labels, img_root, img_path, save_root):
                            # 只记录有目标的xml文件
                            xml_list.append(img_path.split("/")[-1].split(".")[0])

                    box_counter = 0
                    labels.clear()
                    idx += 1
                    progress_bar.set_description(f"{idx} images")
            else:
                raise RuntimeError("Error parsing annotation file {}".format(txt_path))

        with open(split+'.txt', 'w') as w:
            w.write("\n".join(xml_list))


parse_wider_txt("/data/wider_face/",
                "val",
                "./annotation/")


================================================
FILE: pytorch_classification/ConfusionMatrix/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/ConfusionMatrix/main.py
================================================
import os
import json

import torch
from torchvision import transforms, datasets
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from prettytable import PrettyTable

from model import MobileNetV2


class ConfusionMatrix(object):
    """
    注意，如果显示的图像不全，是matplotlib版本问题
    本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常
    需要额外安装prettytable库
    """
    def __init__(self, num_classes: int, labels: list):
        self.matrix = np.zeros((num_classes, num_classes))
        self.num_classes = num_classes
        self.labels = labels

    def update(self, preds, labels):
        for p, t in zip(preds, labels):
            self.matrix[p, t] += 1

    def summary(self):
        # calculate accuracy
        sum_TP = 0
        for i in range(self.num_classes):
            sum_TP += self.matrix[i, i]
        acc = sum_TP / np.sum(self.matrix)
        print("the model accuracy is ", acc)

        # precision, recall, specificity
        table = PrettyTable()
        table.field_names = ["", "Precision", "Recall", "Specificity"]
        for i in range(self.num_classes):
            TP = self.matrix[i, i]
            FP = np.sum(self.matrix[i, :]) - TP
            FN = np.sum(self.matrix[:, i]) - TP
            TN = np.sum(self.matrix) - TP - FP - FN
            Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0.
            Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0.
            Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0.
            table.add_row([self.labels[i], Precision, Recall, Specificity])
        print(table)

    def plot(self):
        matrix = self.matrix
        print(matrix)
        plt.imshow(matrix, cmap=plt.cm.Blues)

        # 设置x轴坐标label
        plt.xticks(range(self.num_classes), self.labels, rotation=45)
        # 设置y轴坐标label
        plt.yticks(range(self.num_classes), self.labels)
        # 显示colorbar
        plt.colorbar()
        plt.xlabel('True Labels')
        plt.ylabel('Predicted Labels')
        plt.title('Confusion matrix')

        # 在图中标注数量/概率信息
        thresh = matrix.max() / 2
        for x in range(self.num_classes):
            for y in range(self.num_classes):
                # 注意这里的matrix[y, x]不是matrix[x, y]
                info = int(matrix[y, x])
                plt.text(x, y, info,
                         verticalalignment='center',
                         horizontalalignment='center',
                         color="white" if info > thresh else "black")
        plt.tight_layout()
        plt.show()


if __name__ == '__main__':
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    data_transform = transforms.Compose([transforms.Resize(256),
                                         transforms.CenterCrop(224),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "data path {} does not exist.".format(image_path)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform)

    batch_size = 16
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=2)
    net = MobileNetV2(num_classes=5)
    # load pretrain weights
    model_weight_path = "./MobileNetV2.pth"
    assert os.path.exists(model_weight_path), "cannot find {} file".format(model_weight_path)
    net.load_state_dict(torch.load(model_weight_path, map_location=device))
    net.to(device)

    # read class_indict
    json_label_path = './class_indices.json'
    assert os.path.exists(json_label_path), "cannot find {} file".format(json_label_path)
    json_file = open(json_label_path, 'r')
    class_indict = json.load(json_file)

    labels = [label for _, label in class_indict.items()]
    confusion = ConfusionMatrix(num_classes=5, labels=labels)
    net.eval()
    with torch.no_grad():
        for val_data in tqdm(validate_loader):
            val_images, val_labels = val_data
            outputs = net(val_images.to(device))
            outputs = torch.softmax(outputs, dim=1)
            outputs = torch.argmax(outputs, dim=1)
            confusion.update(outputs.to("cpu").numpy(), val_labels.to("cpu").numpy())
    confusion.plot()
    confusion.summary()


================================================
FILE: pytorch_classification/ConfusionMatrix/model.py
================================================
from torch import nn
import torch


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel

        layers = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))
        layers.extend([
            # 3x3 depthwise conv
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
            # 1x1 pointwise conv(linear)
            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # conv1 layer
        features.append(ConvBNReLU(3, input_channel, stride=2))
        # building inverted residual residual blockes
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, last_channel, 1))
        # combine feature layers
        self.features = nn.Sequential(*features)

        # building classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


================================================
FILE: pytorch_classification/ConvNeXt/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/ConvNeXt/model.py
================================================
"""
original code from facebook research:
https://github.com/facebookresearch/ConvNeXt
"""

import torch
import torch.nn as nn
import torch.nn.functional as F


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.

    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class LayerNorm(nn.Module):
    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
    with shape (batch_size, channels, height, width).
    """

    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(normalized_shape), requires_grad=True)
        self.bias = nn.Parameter(torch.zeros(normalized_shape), requires_grad=True)
        self.eps = eps
        self.data_format = data_format
        if self.data_format not in ["channels_last", "channels_first"]:
            raise ValueError(f"not support data format '{self.data_format}'")
        self.normalized_shape = (normalized_shape,)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self.data_format == "channels_last":
            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        elif self.data_format == "channels_first":
            # [batch_size, channels, height, width]
            mean = x.mean(1, keepdim=True)
            var = (x - mean).pow(2).mean(1, keepdim=True)
            x = (x - mean) / torch.sqrt(var + self.eps)
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x


class Block(nn.Module):
    r""" ConvNeXt Block. There are two equivalent implementations:
    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
    We use (2) as we find it slightly faster in PyTorch

    Args:
        dim (int): Number of input channels.
        drop_rate (float): Stochastic depth rate. Default: 0.0
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
    """
    def __init__(self, dim, drop_rate=0., layer_scale_init_value=1e-6):
        super().__init__()
        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
        self.norm = LayerNorm(dim, eps=1e-6, data_format="channels_last")
        self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
        self.act = nn.GELU()
        self.pwconv2 = nn.Linear(4 * dim, dim)
        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim,)),
                                  requires_grad=True) if layer_scale_init_value > 0 else None
        self.drop_path = DropPath(drop_rate) if drop_rate > 0. else nn.Identity()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        shortcut = x
        x = self.dwconv(x)
        x = x.permute(0, 2, 3, 1)  # [N, C, H, W] -> [N, H, W, C]
        x = self.norm(x)
        x = self.pwconv1(x)
        x = self.act(x)
        x = self.pwconv2(x)
        if self.gamma is not None:
            x = self.gamma * x
        x = x.permute(0, 3, 1, 2)  # [N, H, W, C] -> [N, C, H, W]

        x = shortcut + self.drop_path(x)
        return x


class ConvNeXt(nn.Module):
    r""" ConvNeXt
        A PyTorch impl of : `A ConvNet for the 2020s`  -
          https://arxiv.org/pdf/2201.03545.pdf
    Args:
        in_chans (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 1000
        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
    """
    def __init__(self, in_chans: int = 3, num_classes: int = 1000, depths: list = None,
                 dims: list = None, drop_path_rate: float = 0., layer_scale_init_value: float = 1e-6,
                 head_init_scale: float = 1.):
        super().__init__()
        self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
        stem = nn.Sequential(nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
                             LayerNorm(dims[0], eps=1e-6, data_format="channels_first"))
        self.downsample_layers.append(stem)

        # 对应stage2-stage4前的3个downsample
        for i in range(3):
            downsample_layer = nn.Sequential(LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
                                             nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2))
            self.downsample_layers.append(downsample_layer)

        self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple blocks
        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
        cur = 0
        # 构建每个stage中堆叠的block
        for i in range(4):
            stage = nn.Sequential(
                *[Block(dim=dims[i], drop_rate=dp_rates[cur + j], layer_scale_init_value=layer_scale_init_value)
                  for j in range(depths[i])]
            )
            self.stages.append(stage)
            cur += depths[i]

        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)  # final norm layer
        self.head = nn.Linear(dims[-1], num_classes)
        self.apply(self._init_weights)
        self.head.weight.data.mul_(head_init_scale)
        self.head.bias.data.mul_(head_init_scale)

    def _init_weights(self, m):
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            nn.init.trunc_normal_(m.weight, std=0.2)
            nn.init.constant_(m.bias, 0)

    def forward_features(self, x: torch.Tensor) -> torch.Tensor:
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)

        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.forward_features(x)
        x = self.head(x)
        return x


def convnext_tiny(num_classes: int):
    # https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth
    model = ConvNeXt(depths=[3, 3, 9, 3],
                     dims=[96, 192, 384, 768],
                     num_classes=num_classes)
    return model


def convnext_small(num_classes: int):
    # https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[96, 192, 384, 768],
                     num_classes=num_classes)
    return model


def convnext_base(num_classes: int):
    # https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth
    # https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[128, 256, 512, 1024],
                     num_classes=num_classes)
    return model


def convnext_large(num_classes: int):
    # https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth
    # https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[192, 384, 768, 1536],
                     num_classes=num_classes)
    return model


def convnext_xlarge(num_classes: int):
    # https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[256, 512, 1024, 2048],
                     num_classes=num_classes)
    return model


================================================
FILE: pytorch_classification/ConvNeXt/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/ConvNeXt/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import convnext_tiny as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"using {device} device.")

    num_classes = 5
    img_size = 224
    data_transform = transforms.Compose(
        [transforms.Resize(int(img_size * 1.14)),
         transforms.CenterCrop(img_size),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=num_classes).to(device)
    # load model weights
    model_weight_path = "./weights/best_model.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/ConvNeXt/train.py
================================================
import os
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

from my_dataset import MyDataSet
from model import convnext_tiny as create_model
from utils import read_split_data, create_lr_scheduler, get_params_groups, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print(f"using {device} device.")

    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    tb_writer = SummaryWriter()

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = 224
    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(img_size),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)),
                                   transforms.CenterCrop(img_size),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes).to(device)

    if args.weights != "":
        assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
        weights_dict = torch.load(args.weights, map_location=device)["model"]
        # 删除有关分类类别的权重
        for k in list(weights_dict.keys()):
            if "head" in k:
                del weights_dict[k]
        print(model.load_state_dict(weights_dict, strict=False))

    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除head外，其他权重全部冻结
            if "head" not in name:
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    # pg = [p for p in model.parameters() if p.requires_grad]
    pg = get_params_groups(model, weight_decay=args.wd)
    optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=args.wd)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs,
                                       warmup=True, warmup_epochs=1)

    best_acc = 0.
    for epoch in range(args.epochs):
        # train
        train_loss, train_acc = train_one_epoch(model=model,
                                                optimizer=optimizer,
                                                data_loader=train_loader,
                                                device=device,
                                                epoch=epoch,
                                                lr_scheduler=lr_scheduler)

        # validate
        val_loss, val_acc = evaluate(model=model,
                                     data_loader=val_loader,
                                     device=device,
                                     epoch=epoch)

        tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
        tb_writer.add_scalar(tags[0], train_loss, epoch)
        tb_writer.add_scalar(tags[1], train_acc, epoch)
        tb_writer.add_scalar(tags[2], val_loss, epoch)
        tb_writer.add_scalar(tags[3], val_acc, epoch)
        tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)

        if best_acc < val_acc:
            torch.save(model.state_dict(), "./weights/best_model.pth")
            best_acc = val_acc


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--lr', type=float, default=5e-4)
    parser.add_argument('--wd', type=float, default=5e-2)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # 预训练权重路径，如果不想载入就设置为空字符
    # 链接: https://pan.baidu.com/s/1aNqQW4n_RrUlWUBNlaJRHA  密码: i83t
    parser.add_argument('--weights', type=str, default='./convnext_tiny_1k_224_ema.pth',
                        help='initial weights path')
    # 是否冻结head以外所有权重
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/ConvNeXt/utils.py
================================================
import os
import sys
import json
import pickle
import random
import math

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    accu_loss = torch.zeros(1).to(device)  # 累计损失
    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    optimizer.zero_grad()

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        accu_loss += loss.detach()

        data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}, lr: {:.5f}".format(
            epoch,
            accu_loss.item() / (step + 1),
            accu_num.item() / sample_num,
            optimizer.param_groups[0]["lr"]
        )

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()
        # update lr
        lr_scheduler.step()

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


@torch.no_grad()
def evaluate(model, data_loader, device, epoch):
    loss_function = torch.nn.CrossEntropyLoss()

    model.eval()

    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    accu_loss = torch.zeros(1).to(device)  # 累计损失

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        accu_loss += loss

        data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(
            epoch,
            accu_loss.item() / (step + 1),
            accu_num.item() / sample_num
        )

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


def create_lr_scheduler(optimizer,
                        num_step: int,
                        epochs: int,
                        warmup=True,
                        warmup_epochs=1,
                        warmup_factor=1e-3,
                        end_factor=1e-6):
    assert num_step > 0 and epochs > 0
    if warmup is False:
        warmup_epochs = 0

    def f(x):
        """
        根据step数返回一个学习率倍率因子，
        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法
        """
        if warmup is True and x <= (warmup_epochs * num_step):
            alpha = float(x) / (warmup_epochs * num_step)
            # warmup过程中lr倍率因子从warmup_factor -> 1
            return warmup_factor * (1 - alpha) + alpha
        else:
            current_step = (x - warmup_epochs * num_step)
            cosine_steps = (epochs - warmup_epochs) * num_step
            # warmup后lr倍率因子从1 -> end_factor
            return ((1 + math.cos(current_step * math.pi / cosine_steps)) / 2) * (1 - end_factor) + end_factor

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def get_params_groups(model: torch.nn.Module, weight_decay: float = 1e-5):
    # 记录optimize要训练的权重参数
    parameter_group_vars = {"decay": {"params": [], "weight_decay": weight_decay},
                            "no_decay": {"params": [], "weight_decay": 0.}}

    # 记录对应的权重名称
    parameter_group_names = {"decay": {"params": [], "weight_decay": weight_decay},
                             "no_decay": {"params": [], "weight_decay": 0.}}

    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue  # frozen weights

        if len(param.shape) == 1 or name.endswith(".bias"):
            group_name = "no_decay"
        else:
            group_name = "decay"

        parameter_group_vars[group_name]["params"].append(param)
        parameter_group_names[group_name]["params"].append(name)

    print("Param groups = %s" % json.dumps(parameter_group_names, indent=2))
    return list(parameter_group_vars.values())


================================================
FILE: pytorch_classification/MobileViT/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/MobileViT/model.py
================================================
"""
original code from apple:
https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
"""

from typing import Optional, Tuple, Union, Dict
import math
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import functional as F

from transformer import TransformerEncoder
from model_config import get_config


def make_divisible(
    v: Union[float, int],
    divisor: Optional[int] = 8,
    min_value: Optional[Union[float, int]] = None,
) -> Union[float, int]:
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class ConvLayer(nn.Module):
    """
    Applies a 2D convolution over an input

    Args:
        in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`
        out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})`
        kernel_size (Union[int, Tuple[int, int]]): Kernel size for convolution.
        stride (Union[int, Tuple[int, int]]): Stride for convolution. Default: 1
        groups (Optional[int]): Number of groups in convolution. Default: 1
        bias (Optional[bool]): Use bias. Default: ``False``
        use_norm (Optional[bool]): Use normalization layer after convolution. Default: ``True``
        use_act (Optional[bool]): Use activation layer after convolution (or convolution and normalization).
                                Default: ``True``

    Shape:
        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
        - Output: :math:`(N, C_{out}, H_{out}, W_{out})`

    .. note::
        For depth-wise convolution, `groups=C_{in}=C_{out}`.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: Union[int, Tuple[int, int]],
        stride: Optional[Union[int, Tuple[int, int]]] = 1,
        groups: Optional[int] = 1,
        bias: Optional[bool] = False,
        use_norm: Optional[bool] = True,
        use_act: Optional[bool] = True,
    ) -> None:
        super().__init__()

        if isinstance(kernel_size, int):
            kernel_size = (kernel_size, kernel_size)

        if isinstance(stride, int):
            stride = (stride, stride)

        assert isinstance(kernel_size, Tuple)
        assert isinstance(stride, Tuple)

        padding = (
            int((kernel_size[0] - 1) / 2),
            int((kernel_size[1] - 1) / 2),
        )

        block = nn.Sequential()

        conv_layer = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            groups=groups,
            padding=padding,
            bias=bias
        )

        block.add_module(name="conv", module=conv_layer)

        if use_norm:
            norm_layer = nn.BatchNorm2d(num_features=out_channels, momentum=0.1)
            block.add_module(name="norm", module=norm_layer)

        if use_act:
            act_layer = nn.SiLU()
            block.add_module(name="act", module=act_layer)

        self.block = block

    def forward(self, x: Tensor) -> Tensor:
        return self.block(x)


class InvertedResidual(nn.Module):
    """
    This class implements the inverted residual block, as described in `MobileNetv2 <https://arxiv.org/abs/1801.04381>`_ paper

    Args:
        in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`
        out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out)`
        stride (int): Use convolutions with a stride. Default: 1
        expand_ratio (Union[int, float]): Expand the input channels by this factor in depth-wise conv
        skip_connection (Optional[bool]): Use skip-connection. Default: True

    Shape:
        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
        - Output: :math:`(N, C_{out}, H_{out}, W_{out})`

    .. note::
        If `in_channels =! out_channels` and `stride > 1`, we set `skip_connection=False`

    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        stride: int,
        expand_ratio: Union[int, float],
        skip_connection: Optional[bool] = True,
    ) -> None:
        assert stride in [1, 2]
        hidden_dim = make_divisible(int(round(in_channels * expand_ratio)), 8)

        super().__init__()

        block = nn.Sequential()
        if expand_ratio != 1:
            block.add_module(
                name="exp_1x1",
                module=ConvLayer(
                    in_channels=in_channels,
                    out_channels=hidden_dim,
                    kernel_size=1
                ),
            )

        block.add_module(
            name="conv_3x3",
            module=ConvLayer(
                in_channels=hidden_dim,
                out_channels=hidden_dim,
                stride=stride,
                kernel_size=3,
                groups=hidden_dim
            ),
        )

        block.add_module(
            name="red_1x1",
            module=ConvLayer(
                in_channels=hidden_dim,
                out_channels=out_channels,
                kernel_size=1,
                use_act=False,
                use_norm=True,
            ),
        )

        self.block = block
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.exp = expand_ratio
        self.stride = stride
        self.use_res_connect = (
            self.stride == 1 and in_channels == out_channels and skip_connection
        )

    def forward(self, x: Tensor, *args, **kwargs) -> Tensor:
        if self.use_res_connect:
            return x + self.block(x)
        else:
            return self.block(x)


class MobileViTBlock(nn.Module):
    """
    This class defines the `MobileViT block <https://arxiv.org/abs/2110.02178?context=cs.LG>`_

    Args:
        opts: command line arguments
        in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H, W)`
        transformer_dim (int): Input dimension to the transformer unit
        ffn_dim (int): Dimension of the FFN block
        n_transformer_blocks (int): Number of transformer blocks. Default: 2
        head_dim (int): Head dimension in the multi-head attention. Default: 32
        attn_dropout (float): Dropout in multi-head attention. Default: 0.0
        dropout (float): Dropout rate. Default: 0.0
        ffn_dropout (float): Dropout between FFN layers in transformer. Default: 0.0
        patch_h (int): Patch height for unfolding operation. Default: 8
        patch_w (int): Patch width for unfolding operation. Default: 8
        transformer_norm_layer (Optional[str]): Normalization layer in the transformer block. Default: layer_norm
        conv_ksize (int): Kernel size to learn local representations in MobileViT block. Default: 3
        no_fusion (Optional[bool]): Do not combine the input and output feature maps. Default: False
    """

    def __init__(
        self,
        in_channels: int,
        transformer_dim: int,
        ffn_dim: int,
        n_transformer_blocks: int = 2,
        head_dim: int = 32,
        attn_dropout: float = 0.0,
        dropout: float = 0.0,
        ffn_dropout: float = 0.0,
        patch_h: int = 8,
        patch_w: int = 8,
        conv_ksize: Optional[int] = 3,
        *args,
        **kwargs
    ) -> None:
        super().__init__()

        conv_3x3_in = ConvLayer(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=conv_ksize,
            stride=1
        )
        conv_1x1_in = ConvLayer(
            in_channels=in_channels,
            out_channels=transformer_dim,
            kernel_size=1,
            stride=1,
            use_norm=False,
            use_act=False
        )

        conv_1x1_out = ConvLayer(
            in_channels=transformer_dim,
            out_channels=in_channels,
            kernel_size=1,
            stride=1
        )
        conv_3x3_out = ConvLayer(
            in_channels=2 * in_channels,
            out_channels=in_channels,
            kernel_size=conv_ksize,
            stride=1
        )

        self.local_rep = nn.Sequential()
        self.local_rep.add_module(name="conv_3x3", module=conv_3x3_in)
        self.local_rep.add_module(name="conv_1x1", module=conv_1x1_in)

        assert transformer_dim % head_dim == 0
        num_heads = transformer_dim // head_dim

        global_rep = [
            TransformerEncoder(
                embed_dim=transformer_dim,
                ffn_latent_dim=ffn_dim,
                num_heads=num_heads,
                attn_dropout=attn_dropout,
                dropout=dropout,
                ffn_dropout=ffn_dropout
            )
            for _ in range(n_transformer_blocks)
        ]
        global_rep.append(nn.LayerNorm(transformer_dim))
        self.global_rep = nn.Sequential(*global_rep)

        self.conv_proj = conv_1x1_out
        self.fusion = conv_3x3_out

        self.patch_h = patch_h
        self.patch_w = patch_w
        self.patch_area = self.patch_w * self.patch_h

        self.cnn_in_dim = in_channels
        self.cnn_out_dim = transformer_dim
        self.n_heads = num_heads
        self.ffn_dim = ffn_dim
        self.dropout = dropout
        self.attn_dropout = attn_dropout
        self.ffn_dropout = ffn_dropout
        self.n_blocks = n_transformer_blocks
        self.conv_ksize = conv_ksize

    def unfolding(self, x: Tensor) -> Tuple[Tensor, Dict]:
        patch_w, patch_h = self.patch_w, self.patch_h
        patch_area = patch_w * patch_h
        batch_size, in_channels, orig_h, orig_w = x.shape

        new_h = int(math.ceil(orig_h / self.patch_h) * self.patch_h)
        new_w = int(math.ceil(orig_w / self.patch_w) * self.patch_w)

        interpolate = False
        if new_w != orig_w or new_h != orig_h:
            # Note: Padding can be done, but then it needs to be handled in attention function.
            x = F.interpolate(x, size=(new_h, new_w), mode="bilinear", align_corners=False)
            interpolate = True

        # number of patches along width and height
        num_patch_w = new_w // patch_w  # n_w
        num_patch_h = new_h // patch_h  # n_h
        num_patches = num_patch_h * num_patch_w  # N

        # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w]
        x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w)
        # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w]
        x = x.transpose(1, 2)
        # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w
        x = x.reshape(batch_size, in_channels, num_patches, patch_area)
        # [B, C, N, P] -> [B, P, N, C]
        x = x.transpose(1, 3)
        # [B, P, N, C] -> [BP, N, C]
        x = x.reshape(batch_size * patch_area, num_patches, -1)

        info_dict = {
            "orig_size": (orig_h, orig_w),
            "batch_size": batch_size,
            "interpolate": interpolate,
            "total_patches": num_patches,
            "num_patches_w": num_patch_w,
            "num_patches_h": num_patch_h,
        }

        return x, info_dict

    def folding(self, x: Tensor, info_dict: Dict) -> Tensor:
        n_dim = x.dim()
        assert n_dim == 3, "Tensor should be of shape BPxNxC. Got: {}".format(
            x.shape
        )
        # [BP, N, C] --> [B, P, N, C]
        x = x.contiguous().view(
            info_dict["batch_size"], self.patch_area, info_dict["total_patches"], -1
        )

        batch_size, pixels, num_patches, channels = x.size()
        num_patch_h = info_dict["num_patches_h"]
        num_patch_w = info_dict["num_patches_w"]

        # [B, P, N, C] -> [B, C, N, P]
        x = x.transpose(1, 3)
        # [B, C, N, P] -> [B*C*n_h, n_w, p_h, p_w]
        x = x.reshape(batch_size * channels * num_patch_h, num_patch_w, self.patch_h, self.patch_w)
        # [B*C*n_h, n_w, p_h, p_w] -> [B*C*n_h, p_h, n_w, p_w]
        x = x.transpose(1, 2)
        # [B*C*n_h, p_h, n_w, p_w] -> [B, C, H, W]
        x = x.reshape(batch_size, channels, num_patch_h * self.patch_h, num_patch_w * self.patch_w)
        if info_dict["interpolate"]:
            x = F.interpolate(
                x,
                size=info_dict["orig_size"],
                mode="bilinear",
                align_corners=False,
            )
        return x

    def forward(self, x: Tensor) -> Tensor:
        res = x

        fm = self.local_rep(x)

        # convert feature map to patches
        patches, info_dict = self.unfolding(fm)

        # learn global representations
        for transformer_layer in self.global_rep:
            patches = transformer_layer(patches)

        # [B x Patch x Patches x C] -> [B x C x Patches x Patch]
        fm = self.folding(x=patches, info_dict=info_dict)

        fm = self.conv_proj(fm)

        fm = self.fusion(torch.cat((res, fm), dim=1))
        return fm


class MobileViT(nn.Module):
    """
    This class implements the `MobileViT architecture <https://arxiv.org/abs/2110.02178?context=cs.LG>`_
    """
    def __init__(self, model_cfg: Dict, num_classes: int = 1000):
        super().__init__()

        image_channels = 3
        out_channels = 16

        self.conv_1 = ConvLayer(
            in_channels=image_channels,
            out_channels=out_channels,
            kernel_size=3,
            stride=2
        )

        self.layer_1, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer1"])
        self.layer_2, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer2"])
        self.layer_3, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer3"])
        self.layer_4, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer4"])
        self.layer_5, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer5"])

        exp_channels = min(model_cfg["last_layer_exp_factor"] * out_channels, 960)
        self.conv_1x1_exp = ConvLayer(
            in_channels=out_channels,
            out_channels=exp_channels,
            kernel_size=1
        )

        self.classifier = nn.Sequential()
        self.classifier.add_module(name="global_pool", module=nn.AdaptiveAvgPool2d(1))
        self.classifier.add_module(name="flatten", module=nn.Flatten())
        if 0.0 < model_cfg["cls_dropout"] < 1.0:
            self.classifier.add_module(name="dropout", module=nn.Dropout(p=model_cfg["cls_dropout"]))
        self.classifier.add_module(name="fc", module=nn.Linear(in_features=exp_channels, out_features=num_classes))

        # weight init
        self.apply(self.init_parameters)

    def _make_layer(self, input_channel, cfg: Dict) -> Tuple[nn.Sequential, int]:
        block_type = cfg.get("block_type", "mobilevit")
        if block_type.lower() == "mobilevit":
            return self._make_mit_layer(input_channel=input_channel, cfg=cfg)
        else:
            return self._make_mobilenet_layer(input_channel=input_channel, cfg=cfg)

    @staticmethod
    def _make_mobilenet_layer(input_channel: int, cfg: Dict) -> Tuple[nn.Sequential, int]:
        output_channels = cfg.get("out_channels")
        num_blocks = cfg.get("num_blocks", 2)
        expand_ratio = cfg.get("expand_ratio", 4)
        block = []

        for i in range(num_blocks):
            stride = cfg.get("stride", 1) if i == 0 else 1

            layer = InvertedResidual(
                in_channels=input_channel,
                out_channels=output_channels,
                stride=stride,
                expand_ratio=expand_ratio
            )
            block.append(layer)
            input_channel = output_channels

        return nn.Sequential(*block), input_channel

    @staticmethod
    def _make_mit_layer(input_channel: int, cfg: Dict) -> [nn.Sequential, int]:
        stride = cfg.get("stride", 1)
        block = []

        if stride == 2:
            layer = InvertedResidual(
                in_channels=input_channel,
                out_channels=cfg.get("out_channels"),
                stride=stride,
                expand_ratio=cfg.get("mv_expand_ratio", 4)
            )

            block.append(layer)
            input_channel = cfg.get("out_channels")

        transformer_dim = cfg["transformer_channels"]
        ffn_dim = cfg.get("ffn_dim")
        num_heads = cfg.get("num_heads", 4)
        head_dim = transformer_dim // num_heads

        if transformer_dim % head_dim != 0:
            raise ValueError("Transformer input dimension should be divisible by head dimension. "
                             "Got {} and {}.".format(transformer_dim, head_dim))

        block.append(MobileViTBlock(
            in_channels=input_channel,
            transformer_dim=transformer_dim,
            ffn_dim=ffn_dim,
            n_transformer_blocks=cfg.get("transformer_blocks", 1),
            patch_h=cfg.get("patch_h", 2),
            patch_w=cfg.get("patch_w", 2),
            dropout=cfg.get("dropout", 0.1),
            ffn_dropout=cfg.get("ffn_dropout", 0.0),
            attn_dropout=cfg.get("attn_dropout", 0.1),
            head_dim=head_dim,
            conv_ksize=3
        ))

        return nn.Sequential(*block), input_channel

    @staticmethod
    def init_parameters(m):
        if isinstance(m, nn.Conv2d):
            if m.weight is not None:
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
            if m.weight is not None:
                nn.init.ones_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, (nn.Linear,)):
            if m.weight is not None:
                nn.init.trunc_normal_(m.weight, mean=0.0, std=0.02)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        else:
            pass

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv_1(x)
        x = self.layer_1(x)
        x = self.layer_2(x)

        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        x = self.conv_1x1_exp(x)
        x = self.classifier(x)
        return x


def mobile_vit_xx_small(num_classes: int = 1000):
    # pretrain weight link
    # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xxs.pt
    config = get_config("xx_small")
    m = MobileViT(config, num_classes=num_classes)
    return m


def mobile_vit_x_small(num_classes: int = 1000):
    # pretrain weight link
    # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xs.pt
    config = get_config("x_small")
    m = MobileViT(config, num_classes=num_classes)
    return m


def mobile_vit_small(num_classes: int = 1000):
    # pretrain weight link
    # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_s.pt
    config = get_config("small")
    m = MobileViT(config, num_classes=num_classes)
    return m


================================================
FILE: pytorch_classification/MobileViT/model_config.py
================================================
def get_config(mode: str = "xxs") -> dict:
    if mode == "xx_small":
        mv2_exp_mult = 2
        config = {
            "layer1": {
                "out_channels": 16,
                "expand_ratio": mv2_exp_mult,
                "num_blocks": 1,
                "stride": 1,
                "block_type": "mv2",
            },
            "layer2": {
                "out_channels": 24,
                "expand_ratio": mv2_exp_mult,
                "num_blocks": 3,
                "stride": 2,
                "block_type": "mv2",
            },
            "layer3": {  # 28x28
                "out_channels": 48,
                "transformer_channels": 64,
                "ffn_dim": 128,
                "transformer_blocks": 2,
                "patch_h": 2,  # 8,
                "patch_w": 2,  # 8,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "layer4": {  # 14x14
                "out_channels": 64,
                "transformer_channels": 80,
                "ffn_dim": 160,
                "transformer_blocks": 4,
                "patch_h": 2,  # 4,
                "patch_w": 2,  # 4,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "layer5": {  # 7x7
                "out_channels": 80,
                "transformer_channels": 96,
                "ffn_dim": 192,
                "transformer_blocks": 3,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "last_layer_exp_factor": 4,
            "cls_dropout": 0.1
        }
    elif mode == "x_small":
        mv2_exp_mult = 4
        config = {
            "layer1": {
                "out_channels": 32,
                "expand_ratio": mv2_exp_mult,
                "num_blocks": 1,
                "stride": 1,
                "block_type": "mv2",
            },
            "layer2": {
                "out_channels": 48,
                "expand_ratio": mv2_exp_mult,
                "num_blocks": 3,
                "stride": 2,
                "block_type": "mv2",
            },
            "layer3": {  # 28x28
                "out_channels": 64,
                "transformer_channels": 96,
                "ffn_dim": 192,
                "transformer_blocks": 2,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "layer4": {  # 14x14
                "out_channels": 80,
                "transformer_channels": 120,
                "ffn_dim": 240,
                "transformer_blocks": 4,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "layer5": {  # 7x7
                "out_channels": 96,
                "transformer_channels": 144,
                "ffn_dim": 288,
                "transformer_blocks": 3,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "last_layer_exp_factor": 4,
            "cls_dropout": 0.1
        }
    elif mode == "small":
        mv2_exp_mult = 4
        config = {
            "layer1": {
                "out_channels": 32,
                "expand_ratio": mv2_exp_mult,
                "num_blocks": 1,
                "stride": 1,
                "block_type": "mv2",
            },
            "layer2": {
                "out_channels": 64,
                "expand_ratio": mv2_exp_mult,
                "num_blocks": 3,
                "stride": 2,
                "block_type": "mv2",
            },
            "layer3": {  # 28x28
                "out_channels": 96,
                "transformer_channels": 144,
                "ffn_dim": 288,
                "transformer_blocks": 2,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "layer4": {  # 14x14
                "out_channels": 128,
                "transformer_channels": 192,
                "ffn_dim": 384,
                "transformer_blocks": 4,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "layer5": {  # 7x7
                "out_channels": 160,
                "transformer_channels": 240,
                "ffn_dim": 480,
                "transformer_blocks": 3,
                "patch_h": 2,
                "patch_w": 2,
                "stride": 2,
                "mv_expand_ratio": mv2_exp_mult,
                "num_heads": 4,
                "block_type": "mobilevit",
            },
            "last_layer_exp_factor": 4,
            "cls_dropout": 0.1
        }
    else:
        raise NotImplementedError

    for k in ["layer1", "layer2", "layer3", "layer4", "layer5"]:
        config[k].update({"dropout": 0.1, "ffn_dropout": 0.0, "attn_dropout": 0.0})

    return config


================================================
FILE: pytorch_classification/MobileViT/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/MobileViT/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import mobile_vit_xx_small as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    img_size = 224
    data_transform = transforms.Compose(
        [transforms.Resize(int(img_size * 1.14)),
         transforms.CenterCrop(img_size),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/best_model.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/MobileViT/train.py
================================================
import os
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

from my_dataset import MyDataSet
from model import mobile_vit_xx_small as create_model
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    tb_writer = SummaryWriter()

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = 224
    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(img_size),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)),
                                   transforms.CenterCrop(img_size),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes).to(device)

    if args.weights != "":
        assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
        weights_dict = torch.load(args.weights, map_location=device)
        weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
        # 删除有关分类类别的权重
        for k in list(weights_dict.keys()):
            if "classifier" in k:
                del weights_dict[k]
        print(model.load_state_dict(weights_dict, strict=False))

    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除head外，其他权重全部冻结
            if "classifier" not in name:
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=1E-2)

    best_acc = 0.
    for epoch in range(args.epochs):
        # train
        train_loss, train_acc = train_one_epoch(model=model,
                                                optimizer=optimizer,
                                                data_loader=train_loader,
                                                device=device,
                                                epoch=epoch)

        # validate
        val_loss, val_acc = evaluate(model=model,
                                     data_loader=val_loader,
                                     device=device,
                                     epoch=epoch)

        tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
        tb_writer.add_scalar(tags[0], train_loss, epoch)
        tb_writer.add_scalar(tags[1], train_acc, epoch)
        tb_writer.add_scalar(tags[2], val_loss, epoch)
        tb_writer.add_scalar(tags[3], val_acc, epoch)
        tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "./weights/best_model.pth")

        torch.save(model.state_dict(), "./weights/latest_model.pth")


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--lr', type=float, default=0.0002)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # 预训练权重路径，如果不想载入就设置为空字符
    parser.add_argument('--weights', type=str, default='./mobilevit_xxs.pt',
                        help='initial weights path')
    # 是否冻结权重
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/MobileViT/transformer.py
================================================
from typing import Optional

import torch
import torch.nn as nn
from torch import Tensor


class MultiHeadAttention(nn.Module):
    """
    This layer applies a multi-head self- or cross-attention as described in
    `Attention is all you need <https://arxiv.org/abs/1706.03762>`_ paper

    Args:
        embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})`
        num_heads (int): Number of heads in multi-head attention
        attn_dropout (float): Attention dropout. Default: 0.0
        bias (bool): Use bias or not. Default: ``True``

    Shape:
        - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches,
        and :math:`C_{in}` is input embedding dim
        - Output: same shape as the input

    """

    def __init__(
        self,
        embed_dim: int,
        num_heads: int,
        attn_dropout: float = 0.0,
        bias: bool = True,
        *args,
        **kwargs
    ) -> None:
        super().__init__()
        if embed_dim % num_heads != 0:
            raise ValueError(
                "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format(
                    self.__class__.__name__, embed_dim, num_heads
                )
            )

        self.qkv_proj = nn.Linear(in_features=embed_dim, out_features=3 * embed_dim, bias=bias)

        self.attn_dropout = nn.Dropout(p=attn_dropout)
        self.out_proj = nn.Linear(in_features=embed_dim, out_features=embed_dim, bias=bias)

        self.head_dim = embed_dim // num_heads
        self.scaling = self.head_dim ** -0.5
        self.softmax = nn.Softmax(dim=-1)
        self.num_heads = num_heads
        self.embed_dim = embed_dim

    def forward(self, x_q: Tensor) -> Tensor:
        # [N, P, C]
        b_sz, n_patches, in_channels = x_q.shape

        # self-attention
        # [N, P, C] -> [N, P, 3C] -> [N, P, 3, h, c] where C = hc
        qkv = self.qkv_proj(x_q).reshape(b_sz, n_patches, 3, self.num_heads, -1)

        # [N, P, 3, h, c] -> [N, h, 3, P, C]
        qkv = qkv.transpose(1, 3).contiguous()

        # [N, h, 3, P, C] -> [N, h, P, C] x 3
        query, key, value = qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2]

        query = query * self.scaling

        # [N h, P, c] -> [N, h, c, P]
        key = key.transpose(-1, -2)

        # QK^T
        # [N, h, P, c] x [N, h, c, P] -> [N, h, P, P]
        attn = torch.matmul(query, key)
        attn = self.softmax(attn)
        attn = self.attn_dropout(attn)

        # weighted sum
        # [N, h, P, P] x [N, h, P, c] -> [N, h, P, c]
        out = torch.matmul(attn, value)

        # [N, h, P, c] -> [N, P, h, c] -> [N, P, C]
        out = out.transpose(1, 2).reshape(b_sz, n_patches, -1)
        out = self.out_proj(out)

        return out


class TransformerEncoder(nn.Module):
    """
    This class defines the pre-norm `Transformer encoder <https://arxiv.org/abs/1706.03762>`_
    Args:
        embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})`
        ffn_latent_dim (int): Inner dimension of the FFN
        num_heads (int) : Number of heads in multi-head attention. Default: 8
        attn_dropout (float): Dropout rate for attention in multi-head attention. Default: 0.0
        dropout (float): Dropout rate. Default: 0.0
        ffn_dropout (float): Dropout between FFN layers. Default: 0.0

    Shape:
        - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches,
        and :math:`C_{in}` is input embedding dim
        - Output: same shape as the input
    """

    def __init__(
        self,
        embed_dim: int,
        ffn_latent_dim: int,
        num_heads: Optional[int] = 8,
        attn_dropout: Optional[float] = 0.0,
        dropout: Optional[float] = 0.0,
        ffn_dropout: Optional[float] = 0.0,
        *args,
        **kwargs
    ) -> None:

        super().__init__()

        attn_unit = MultiHeadAttention(
            embed_dim,
            num_heads,
            attn_dropout=attn_dropout,
            bias=True
        )

        self.pre_norm_mha = nn.Sequential(
            nn.LayerNorm(embed_dim),
            attn_unit,
            nn.Dropout(p=dropout)
        )

        self.pre_norm_ffn = nn.Sequential(
            nn.LayerNorm(embed_dim),
            nn.Linear(in_features=embed_dim, out_features=ffn_latent_dim, bias=True),
            nn.SiLU(),
            nn.Dropout(p=ffn_dropout),
            nn.Linear(in_features=ffn_latent_dim, out_features=embed_dim, bias=True),
            nn.Dropout(p=dropout)
        )
        self.embed_dim = embed_dim
        self.ffn_dim = ffn_latent_dim
        self.ffn_dropout = ffn_dropout
        self.std_dropout = dropout

    def forward(self, x: Tensor) -> Tensor:
        # multi-head attention
        res = x
        x = self.pre_norm_mha(x)
        x = x + res

        # feed forward network
        x = x + self.pre_norm_ffn(x)
        return x


================================================
FILE: pytorch_classification/MobileViT/unfold_test.py
================================================
import time
import torch

batch_size = 8
in_channels = 32
patch_h = 2
patch_w = 2
num_patch_h = 16
num_patch_w = 16
num_patches = num_patch_h * num_patch_w
patch_area = patch_h * patch_w


def official(x: torch.Tensor):
    # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w]
    x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w)
    # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w]
    x = x.transpose(1, 2)
    # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w
    x = x.reshape(batch_size, in_channels, num_patches, patch_area)
    # [B, C, N, P] -> [B, P, N, C]
    x = x.transpose(1, 3)
    # [B, P, N, C] -> [BP, N, C]
    x = x.reshape(batch_size * patch_area, num_patches, -1)

    return x


def my_self(x: torch.Tensor):
    # [B, C, H, W] -> [B, C, n_h, p_h, n_w, p_w]
    x = x.reshape(batch_size, in_channels, num_patch_h, patch_h, num_patch_w, patch_w)
    # [B, C, n_h, p_h, n_w, p_w] -> [B, C, n_h, n_w, p_h, p_w]
    x = x.transpose(3, 4)
    # [B, C, n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w
    x = x.reshape(batch_size, in_channels, num_patches, patch_area)
    # [B, C, N, P] -> [B, P, N, C]
    x = x.transpose(1, 3)
    # [B, P, N, C] -> [BP, N, C]
    x = x.reshape(batch_size * patch_area, num_patches, -1)

    return x


if __name__ == '__main__':
    t = torch.randn(batch_size, in_channels, num_patch_h * patch_h, num_patch_w * patch_w)
    print(torch.equal(official(t), my_self(t)))

    t1 = time.time()
    for _ in range(1000):
        official(t)
    print(f"official time: {time.time() - t1}")

    t1 = time.time()
    for _ in range(1000):
        my_self(t)
    print(f"self time: {time.time() - t1}")


================================================
FILE: pytorch_classification/MobileViT/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss(label_smoothing=0.1)
    accu_loss = torch.zeros(1).to(device)  # 累计损失
    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    optimizer.zero_grad()

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        accu_loss += loss.detach()

        data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


@torch.no_grad()
def evaluate(model, data_loader, device, epoch):
    loss_function = torch.nn.CrossEntropyLoss()

    model.eval()

    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    accu_loss = torch.zeros(1).to(device)  # 累计损失

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        accu_loss += loss

        data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


================================================
FILE: pytorch_classification/README.md
================================================
## 该文件夹存放使用pytorch实现的代码版本
**model.py**： 是模型文件  
**train.py**： 是调用模型训练的文件    
**predict.py**： 是调用模型进行预测的文件  
**class_indices.json**： 是训练数据集对应的标签文件   

------
若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。   
[点击这里](../data_set/README.md)会告诉你如何去下载数据集，以及提供了现成的划分数据集脚本  

================================================
FILE: pytorch_classification/Test10_regnet/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA  密码: 8lmu
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/Test10_regnet/model.py
================================================
from typing import Optional

import numpy as np
import torch
import torch.nn as nn
from torch import Tensor


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


def _mcfg(**kwargs):
    cfg = dict(se_ratio=0., bottle_ratio=1., stem_width=32)
    cfg.update(**kwargs)
    return cfg


model_cfgs = {
    "regnetx_200mf": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13),
    "regnetx_400mf": _mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22),
    "regnetx_600mf": _mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16),
    "regnetx_800mf": _mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16),
    "regnetx_1.6gf": _mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18),
    "regnetx_3.2gf": _mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25),
    "regnetx_4.0gf": _mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23),
    "regnetx_6.4gf": _mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17),
    "regnetx_8.0gf": _mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23),
    "regnetx_12gf": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19),
    "regnetx_16gf": _mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22),
    "regnetx_32gf": _mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23),
    "regnety_200mf": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25),
    "regnety_400mf": _mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25),
    "regnety_600mf": _mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25),
    "regnety_800mf": _mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25),
    "regnety_1.6gf": _mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25),
    "regnety_3.2gf": _mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25),
    "regnety_4.0gf": _mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25),
    "regnety_6.4gf": _mcfg(w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25),
    "regnety_8.0gf": _mcfg(w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25),
    "regnety_12gf": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25),
    "regnety_16gf": _mcfg(w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25),
    "regnety_32gf": _mcfg(w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25)
}


def generate_width_depth(wa, w0, wm, depth, q=8):
    """Generates per block widths from RegNet parameters."""
    assert wa > 0 and w0 > 0 and wm > 1 and w0 % q == 0
    widths_cont = np.arange(depth) * wa + w0
    width_exps = np.round(np.log(widths_cont / w0) / np.log(wm))
    widths_j = w0 * np.power(wm, width_exps)
    widths_j = np.round(np.divide(widths_j, q)) * q
    num_stages, max_stage = len(np.unique(widths_j)), width_exps.max() + 1
    assert num_stages == int(max_stage)
    assert num_stages == 4
    widths = widths_j.astype(int).tolist()
    return widths, num_stages


def adjust_width_groups_comp(widths: list, groups: list):
    """Adjusts the compatibility of widths and groups."""
    groups = [min(g, w_bot) for g, w_bot in zip(groups, widths)]
    # Adjust w to an integral multiple of g
    widths = [int(round(w / g) * g) for w, g in zip(widths, groups)]
    return widths, groups


class ConvBNAct(nn.Module):
    def __init__(self,
                 in_c: int,
                 out_c: int,
                 kernel_s: int = 1,
                 stride: int = 1,
                 padding: int = 0,
                 groups: int = 1,
                 act: Optional[nn.Module] = nn.ReLU(inplace=True)):
        super(ConvBNAct, self).__init__()

        self.conv = nn.Conv2d(in_channels=in_c,
                              out_channels=out_c,
                              kernel_size=kernel_s,
                              stride=stride,
                              padding=padding,
                              groups=groups,
                              bias=False)

        self.bn = nn.BatchNorm2d(out_c)
        self.act = act if act is not None else nn.Identity()

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        return x


class RegHead(nn.Module):
    def __init__(self,
                 in_unit: int = 368,
                 out_unit: int = 1000,
                 output_size: tuple = (1, 1),
                 drop_ratio: float = 0.25):
        super(RegHead, self).__init__()
        self.pool = nn.AdaptiveAvgPool2d(output_size)

        if drop_ratio > 0:
            self.dropout = nn.Dropout(p=drop_ratio)
        else:
            self.dropout = nn.Identity()

        self.fc = nn.Linear(in_features=in_unit, out_features=out_unit)

    def forward(self, x: Tensor) -> Tensor:
        x = self.pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.dropout(x)
        x = self.fc(x)
        return x


class SqueezeExcitation(nn.Module):
    def __init__(self, input_c: int, expand_c: int, se_ratio: float = 0.25):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = int(input_c * se_ratio)
        self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1)
        self.ac1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1)
        self.ac2 = nn.Sigmoid()

    def forward(self, x: Tensor) -> Tensor:
        scale = x.mean((2, 3), keepdim=True)
        scale = self.fc1(scale)
        scale = self.ac1(scale)
        scale = self.fc2(scale)
        scale = self.ac2(scale)
        return scale * x


class Bottleneck(nn.Module):
    def __init__(self,
                 in_c: int,
                 out_c: int,
                 stride: int = 1,
                 group_width: int = 1,
                 se_ratio: float = 0.,
                 drop_ratio: float = 0.):
        super(Bottleneck, self).__init__()

        self.conv1 = ConvBNAct(in_c=in_c, out_c=out_c, kernel_s=1)
        self.conv2 = ConvBNAct(in_c=out_c,
                               out_c=out_c,
                               kernel_s=3,
                               stride=stride,
                               padding=1,
                               groups=out_c // group_width)

        if se_ratio > 0:
            self.se = SqueezeExcitation(in_c, out_c, se_ratio)
        else:
            self.se = nn.Identity()

        self.conv3 = ConvBNAct(in_c=out_c, out_c=out_c, kernel_s=1, act=None)
        self.ac3 = nn.ReLU(inplace=True)

        if drop_ratio > 0:
            self.dropout = nn.Dropout(p=drop_ratio)
        else:
            self.dropout = nn.Identity()

        if (in_c != out_c) or (stride != 1):
            self.downsample = ConvBNAct(in_c=in_c, out_c=out_c, kernel_s=1, stride=stride, act=None)
        else:
            self.downsample = nn.Identity()

    def zero_init_last_bn(self):
        nn.init.zeros_(self.conv3.bn.weight)

    def forward(self, x: Tensor) -> Tensor:
        shortcut = x
        x = self.conv1(x)
        x = self.conv2(x)

        x = self.se(x)
        x = self.conv3(x)

        x = self.dropout(x)

        shortcut = self.downsample(shortcut)

        x += shortcut
        x = self.ac3(x)
        return x


class RegStage(nn.Module):
    def __init__(self,
                 in_c: int,
                 out_c: int,
                 depth: int,
                 group_width: int,
                 se_ratio: float):
        super(RegStage, self).__init__()
        for i in range(depth):
            block_stride = 2 if i == 0 else 1
            block_in_c = in_c if i == 0 else out_c

            name = "b{}".format(i + 1)
            self.add_module(name,
                            Bottleneck(in_c=block_in_c,
                                       out_c=out_c,
                                       stride=block_stride,
                                       group_width=group_width,
                                       se_ratio=se_ratio))

    def forward(self, x: Tensor) -> Tensor:
        for block in self.children():
            x = block(x)
        return x


class RegNet(nn.Module):
    """RegNet model.

    Paper: https://arxiv.org/abs/2003.13678
    Original Impl: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py
    and refer to: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/regnet.py
    """

    def __init__(self,
                 cfg: dict,
                 in_c: int = 3,
                 num_classes: int = 1000,
                 zero_init_last_bn: bool = True):
        super(RegNet, self).__init__()

        # RegStem
        stem_c = cfg["stem_width"]
        self.stem = ConvBNAct(in_c, out_c=stem_c, kernel_s=3, stride=2, padding=1)

        # build stages
        input_channels = stem_c
        stage_info = self._build_stage_info(cfg)
        for i, stage_args in enumerate(stage_info):
            stage_name = "s{}".format(i + 1)
            self.add_module(stage_name, RegStage(in_c=input_channels, **stage_args))
            input_channels = stage_args["out_c"]

        # RegHead
        self.head = RegHead(in_unit=input_channels, out_unit=num_classes)

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, mode="fan_out",  nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                nn.init.zeros_(m.bias)

        if zero_init_last_bn:
            for m in self.modules():
                if hasattr(m, "zero_init_last_bn"):
                    m.zero_init_last_bn()

    def forward(self, x: Tensor) -> Tensor:
        for layer in self.children():
            x = layer(x)
        return x

    @staticmethod
    def _build_stage_info(cfg: dict):
        wa, w0, wm, d = cfg["wa"], cfg["w0"], cfg["wm"], cfg["depth"]
        widths, num_stages = generate_width_depth(wa, w0, wm, d)

        stage_widths, stage_depths = np.unique(widths, return_counts=True)
        stage_groups = [cfg['group_w'] for _ in range(num_stages)]
        stage_widths, stage_groups = adjust_width_groups_comp(stage_widths, stage_groups)

        info = []
        for i in range(num_stages):
            info.append(dict(out_c=stage_widths[i],
                             depth=stage_depths[i],
                             group_width=stage_groups[i],
                             se_ratio=cfg["se_ratio"]))

        return info


def create_regnet(model_name="RegNetX_200MF", num_classes=1000):
    model_name = model_name.lower().replace("-", "_")
    if model_name not in model_cfgs.keys():
        print("support model name: \n{}".format("\n".join(model_cfgs.keys())))
        raise KeyError("not support model name: {}".format(model_name))

    model = RegNet(cfg=model_cfgs[model_name], num_classes=num_classes)
    return model


================================================
FILE: pytorch_classification/Test10_regnet/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/Test10_regnet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import create_regnet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_regnet(model_name="RegNetY_400MF", num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-29.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test10_regnet/pretrain_weights.py
================================================
import requests


download_links = {
    "regnetx_200mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth',
    "regnetx_400mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth',
    "regnetx_600mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth',
    "regnetx_800mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth',
    "regnetx_1.6gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth',
    "regnetx_3.2gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth',
    "regnetx_4.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth',
    "regnetx_6.4gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth',
    "regnetx_8.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth',
    "regnetx_12gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth',
    "regnetx_16gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth',
    "regnetx_32gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth',
    "regnety_200mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth',
    "regnety_400mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth',
    "regnety_600mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth',
    "regnety_800mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth',
    "regnety_1.6gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth',
    "regnety_3.2gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth',
    "regnety_4.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth',
    "regnety_6.4gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth',
    "regnety_8.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth',
    "regnety_12gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth',
    "regnety_16gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth',
    "regnety_32gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth',
}


def main():
    model_name = "regnetx_400mf"
    print("download weights name: " + model_name)

    if model_name not in download_links.keys():
        raise KeyError("{} not in download_links".format(model_name))

    headers = {"Content-Type": "application/json",
               "Connection": "close",
               "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"}

    save_weights = "./" + model_name + ".pth"

    req = requests.get(url=download_links[model_name],
                       stream=True, headers=headers, timeout=10)
    req.raise_for_status()
    info = int(req.headers["Content-Length"])

    accumulate_data = 0
    with open(save_weights, "wb") as f:
        for data in req.iter_content(2048):
            f.write(data)
            accumulate_data += 2048
            print("\rdownload: [{}Mb/{}Mb] {}%".format(int(accumulate_data / 1024 / 1024),
                                                       int(info / 1024 / 1024),
                                                       int(accumulate_data / info * 100)), end="")
    req.close()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test10_regnet/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import create_regnet
from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # 如果存在预训练权重则载入
    model = create_regnet(model_name=args.model_name,
                          num_classes=args.num_classes).to(device)
    # print(model)

    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {k: v for k, v in weights_dict.items()
                                 if model.state_dict()[k].numel() == v.numel()}
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "head" not in name:
                para.requires_grad_(False)
            else:
                print("train {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)

        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.01)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")
    parser.add_argument('--model-name', default='RegNetY_400MF', help='create model name')

    # 预训练权重下载地址
    # 链接: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA  密码: 8lmu
    parser.add_argument('--weights', type=str, default='regnety_400mf.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/Test10_regnet/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data

        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 验证样本总个数
    total_num = len(data_loader.dataset)

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    return sum_num.item() / total_num


================================================
FILE: pytorch_classification/Test11_efficientnetV2/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ  密码: 5gu1
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/Test11_efficientnetV2/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test11_efficientnetV2/model.py
================================================
from collections import OrderedDict
from functools import partial
from typing import Callable, Optional

import torch.nn as nn
import torch
from torch import Tensor


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf

    This function is taken from the rwightman.
    It can be seen here:
    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class ConvBNAct(nn.Module):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        super(ConvBNAct, self).__init__()

        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)

        self.conv = nn.Conv2d(in_channels=in_planes,
                              out_channels=out_planes,
                              kernel_size=kernel_size,
                              stride=stride,
                              padding=padding,
                              groups=groups,
                              bias=False)

        self.bn = norm_layer(out_planes)
        self.act = activation_layer()

    def forward(self, x):
        result = self.conv(x)
        result = self.bn(result)
        result = self.act(result)

        return result


class SqueezeExcite(nn.Module):
    def __init__(self,
                 input_c: int,   # block input channel
                 expand_c: int,  # block expand channel
                 se_ratio: float = 0.25):
        super(SqueezeExcite, self).__init__()
        squeeze_c = int(input_c * se_ratio)
        self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1)
        self.act1 = nn.SiLU()  # alias Swish
        self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1)
        self.act2 = nn.Sigmoid()

    def forward(self, x: Tensor) -> Tensor:
        scale = x.mean((2, 3), keepdim=True)
        scale = self.conv_reduce(scale)
        scale = self.act1(scale)
        scale = self.conv_expand(scale)
        scale = self.act2(scale)
        return scale * x


class MBConv(nn.Module):
    def __init__(self,
                 kernel_size: int,
                 input_c: int,
                 out_c: int,
                 expand_ratio: int,
                 stride: int,
                 se_ratio: float,
                 drop_rate: float,
                 norm_layer: Callable[..., nn.Module]):
        super(MBConv, self).__init__()

        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.has_shortcut = (stride == 1 and input_c == out_c)

        activation_layer = nn.SiLU  # alias Swish
        expanded_c = input_c * expand_ratio

        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在
        assert expand_ratio != 1
        # Point-wise expansion
        self.expand_conv = ConvBNAct(input_c,
                                     expanded_c,
                                     kernel_size=1,
                                     norm_layer=norm_layer,
                                     activation_layer=activation_layer)

        # Depth-wise convolution
        self.dwconv = ConvBNAct(expanded_c,
                                expanded_c,
                                kernel_size=kernel_size,
                                stride=stride,
                                groups=expanded_c,
                                norm_layer=norm_layer,
                                activation_layer=activation_layer)

        self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity()

        # Point-wise linear projection
        self.project_conv = ConvBNAct(expanded_c,
                                      out_planes=out_c,
                                      kernel_size=1,
                                      norm_layer=norm_layer,
                                      activation_layer=nn.Identity)  # 注意这里没有激活函数，所有传入Identity

        self.out_channels = out_c

        # 只有在使用shortcut连接时才使用dropout层
        self.drop_rate = drop_rate
        if self.has_shortcut and drop_rate > 0:
            self.dropout = DropPath(drop_rate)

    def forward(self, x: Tensor) -> Tensor:
        result = self.expand_conv(x)
        result = self.dwconv(result)
        result = self.se(result)
        result = self.project_conv(result)

        if self.has_shortcut:
            if self.drop_rate > 0:
                result = self.dropout(result)
            result += x

        return result


class FusedMBConv(nn.Module):
    def __init__(self,
                 kernel_size: int,
                 input_c: int,
                 out_c: int,
                 expand_ratio: int,
                 stride: int,
                 se_ratio: float,
                 drop_rate: float,
                 norm_layer: Callable[..., nn.Module]):
        super(FusedMBConv, self).__init__()

        assert stride in [1, 2]
        assert se_ratio == 0

        self.has_shortcut = stride == 1 and input_c == out_c
        self.drop_rate = drop_rate

        self.has_expansion = expand_ratio != 1

        activation_layer = nn.SiLU  # alias Swish
        expanded_c = input_c * expand_ratio

        # 只有当expand ratio不等于1时才有expand conv
        if self.has_expansion:
            # Expansion convolution
            self.expand_conv = ConvBNAct(input_c,
                                         expanded_c,
                                         kernel_size=kernel_size,
                                         stride=stride,
                                         norm_layer=norm_layer,
                                         activation_layer=activation_layer)

            self.project_conv = ConvBNAct(expanded_c,
                                          out_c,
                                          kernel_size=1,
                                          norm_layer=norm_layer,
                                          activation_layer=nn.Identity)  # 注意没有激活函数
        else:
            # 当只有project_conv时的情况
            self.project_conv = ConvBNAct(input_c,
                                          out_c,
                                          kernel_size=kernel_size,
                                          stride=stride,
                                          norm_layer=norm_layer,
                                          activation_layer=activation_layer)  # 注意有激活函数

        self.out_channels = out_c

        # 只有在使用shortcut连接时才使用dropout层
        self.drop_rate = drop_rate
        if self.has_shortcut and drop_rate > 0:
            self.dropout = DropPath(drop_rate)

    def forward(self, x: Tensor) -> Tensor:
        if self.has_expansion:
            result = self.expand_conv(x)
            result = self.project_conv(result)
        else:
            result = self.project_conv(x)

        if self.has_shortcut:
            if self.drop_rate > 0:
                result = self.dropout(result)

            result += x

        return result


class EfficientNetV2(nn.Module):
    def __init__(self,
                 model_cnf: list,
                 num_classes: int = 1000,
                 num_features: int = 1280,
                 dropout_rate: float = 0.2,
                 drop_connect_rate: float = 0.2):
        super(EfficientNetV2, self).__init__()

        for cnf in model_cnf:
            assert len(cnf) == 8

        norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)

        stem_filter_num = model_cnf[0][4]

        self.stem = ConvBNAct(3,
                              stem_filter_num,
                              kernel_size=3,
                              stride=2,
                              norm_layer=norm_layer)  # 激活函数默认是SiLU

        total_blocks = sum([i[0] for i in model_cnf])
        block_id = 0
        blocks = []
        for cnf in model_cnf:
            repeats = cnf[0]
            op = FusedMBConv if cnf[-2] == 0 else MBConv
            for i in range(repeats):
                blocks.append(op(kernel_size=cnf[1],
                                 input_c=cnf[4] if i == 0 else cnf[5],
                                 out_c=cnf[5],
                                 expand_ratio=cnf[3],
                                 stride=cnf[2] if i == 0 else 1,
                                 se_ratio=cnf[-1],
                                 drop_rate=drop_connect_rate * block_id / total_blocks,
                                 norm_layer=norm_layer))
                block_id += 1
        self.blocks = nn.Sequential(*blocks)

        head_input_c = model_cnf[-1][-3]
        head = OrderedDict()

        head.update({"project_conv": ConvBNAct(head_input_c,
                                               num_features,
                                               kernel_size=1,
                                               norm_layer=norm_layer)})  # 激活函数默认是SiLU

        head.update({"avgpool": nn.AdaptiveAvgPool2d(1)})
        head.update({"flatten": nn.Flatten()})

        if dropout_rate > 0:
            head.update({"dropout": nn.Dropout(p=dropout_rate, inplace=True)})
        head.update({"classifier": nn.Linear(num_features, num_classes)})

        self.head = nn.Sequential(head)

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x: Tensor) -> Tensor:
        x = self.stem(x)
        x = self.blocks(x)
        x = self.head(x)

        return x


def efficientnetv2_s(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 300, eval_size: 384

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],
                    [4, 3, 2, 4, 24, 48, 0, 0],
                    [4, 3, 2, 4, 48, 64, 0, 0],
                    [6, 3, 2, 4, 64, 128, 1, 0.25],
                    [9, 3, 1, 6, 128, 160, 1, 0.25],
                    [15, 3, 2, 6, 160, 256, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.2)
    return model


def efficientnetv2_m(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 384, eval_size: 480

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],
                    [5, 3, 2, 4, 24, 48, 0, 0],
                    [5, 3, 2, 4, 48, 80, 0, 0],
                    [7, 3, 2, 4, 80, 160, 1, 0.25],
                    [14, 3, 1, 6, 160, 176, 1, 0.25],
                    [18, 3, 2, 6, 176, 304, 1, 0.25],
                    [5, 3, 1, 6, 304, 512, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.3)
    return model


def efficientnetv2_l(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 384, eval_size: 480

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],
                    [7, 3, 2, 4, 32, 64, 0, 0],
                    [7, 3, 2, 4, 64, 96, 0, 0],
                    [10, 3, 2, 4, 96, 192, 1, 0.25],
                    [19, 3, 1, 6, 192, 224, 1, 0.25],
                    [25, 3, 2, 6, 224, 384, 1, 0.25],
                    [7, 3, 1, 6, 384, 640, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.4)
    return model


================================================
FILE: pytorch_classification/Test11_efficientnetV2/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/Test11_efficientnetV2/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import efficientnetv2_s as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    img_size = {"s": [300, 384],  # train_size, val_size
                "m": [384, 480],
                "l": [384, 480]}
    num_model = "s"

    data_transform = transforms.Compose(
        [transforms.Resize(img_size[num_model][1]),
         transforms.CenterCrop(img_size[num_model][1]),
         transforms.ToTensor(),
         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-29.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test11_efficientnetV2/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import efficientnetv2_s as create_model
from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = {"s": [300, 384],  # train_size, val_size
                "m": [384, 480],
                "l": [384, 480]}
    num_model = "s"

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model][0]),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
        "val": transforms.Compose([transforms.Resize(img_size[num_model][1]),
                                   transforms.CenterCrop(img_size[num_model][1]),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # 如果存在预训练权重则载入
    model = create_model(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {k: v for k, v in weights_dict.items()
                                 if model.state_dict()[k].numel() == v.numel()}
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除head外，其他权重全部冻结
            if "head" not in name:
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        train_loss, train_acc = train_one_epoch(model=model,
                                                optimizer=optimizer,
                                                data_loader=train_loader,
                                                device=device,
                                                epoch=epoch)

        scheduler.step()

        # validate
        val_loss, val_acc = evaluate(model=model,
                                     data_loader=val_loader,
                                     device=device,
                                     epoch=epoch)

        tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
        tb_writer.add_scalar(tags[0], train_loss, epoch)
        tb_writer.add_scalar(tags[1], train_acc, epoch)
        tb_writer.add_scalar(tags[2], val_loss, epoch)
        tb_writer.add_scalar(tags[3], val_acc, epoch)
        tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lrf', type=float, default=0.01)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # download model weights
    # 链接: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ  密码: 5gu1
    parser.add_argument('--weights', type=str, default='./pre_efficientnetv2-s.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=True)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/Test11_efficientnetV2/trans_effv2_weights.py
================================================
import tensorflow as tf
import torch
import numpy as np


def main(model_name: str = "efficientnetv2-s",
         tf_weights_path: str = "./efficientnetv2-s/model",
         stage0_num: int = 2,
         fused_conv_num: int = 10):

    except_var = ["global_step"]

    new_weights = {}
    var_list = [i for i in tf.train.list_variables(tf_weights_path) if "Exponential" not in i[0]]
    reader = tf.train.load_checkpoint(tf_weights_path)
    for v in var_list:
        if v[0] in except_var:
            continue
        new_name = v[0].replace(model_name + "/", "").replace("/", ".")

        if "stem" in v[0]:
            new_name = new_name.replace("conv2d.kernel",
                                        "conv.weight")

            new_name = new_name.replace("tpu_batch_normalization.beta",
                                        "bn.bias")
            new_name = new_name.replace("tpu_batch_normalization.gamma",
                                        "bn.weight")
            new_name = new_name.replace("tpu_batch_normalization.moving_mean",
                                        "bn.running_mean")
            new_name = new_name.replace("tpu_batch_normalization.moving_variance",
                                        "bn.running_var")
        elif "head" in v[0]:
            new_name = new_name.replace("conv2d.kernel",
                                        "project_conv.conv.weight")
            new_name = new_name.replace("dense.kernel",
                                        "classifier.weight")
            new_name = new_name.replace("dense.bias",
                                        "classifier.bias")

            new_name = new_name.replace("tpu_batch_normalization.beta",
                                        "project_conv.bn.bias")
            new_name = new_name.replace("tpu_batch_normalization.gamma",
                                        "project_conv.bn.weight")
            new_name = new_name.replace("tpu_batch_normalization.moving_mean",
                                        "project_conv.bn.running_mean")
            new_name = new_name.replace("tpu_batch_normalization.moving_variance",
                                        "project_conv.bn.running_var")
        elif "blocks" in v[0]:
            # e.g. blocks_0.conv2d.kernel -> 0
            blocks_id = new_name.split(".", maxsplit=1)[0].replace("blocks_", "")
            new_name = new_name.replace("blocks_{}".format(blocks_id),
                                        "blocks.{}".format(blocks_id))

            if int(blocks_id) <= stage0_num - 1:  # expansion=1 fused_mbconv
                new_name = new_name.replace("conv2d.kernel",
                                            "project_conv.conv.weight")
                new_name = new_name.replace("tpu_batch_normalization.beta",
                                            "project_conv.bn.bias")
                new_name = new_name.replace("tpu_batch_normalization.gamma",
                                            "project_conv.bn.weight")
                new_name = new_name.replace("tpu_batch_normalization.moving_mean",
                                            "project_conv.bn.running_mean")
                new_name = new_name.replace("tpu_batch_normalization.moving_variance",
                                            "project_conv.bn.running_var")
            else:
                new_name = new_name.replace("blocks.{}.conv2d.kernel".format(blocks_id),
                                            "blocks.{}.expand_conv.conv.weight".format(blocks_id))
                new_name = new_name.replace("tpu_batch_normalization.beta",
                                            "expand_conv.bn.bias")
                new_name = new_name.replace("tpu_batch_normalization.gamma",
                                            "expand_conv.bn.weight")
                new_name = new_name.replace("tpu_batch_normalization.moving_mean",
                                            "expand_conv.bn.running_mean")
                new_name = new_name.replace("tpu_batch_normalization.moving_variance",
                                            "expand_conv.bn.running_var")

                if int(blocks_id) <= fused_conv_num - 1:  # fused_mbconv
                    new_name = new_name.replace("blocks.{}.conv2d_1.kernel".format(blocks_id),
                                                "blocks.{}.project_conv.conv.weight".format(blocks_id))
                    new_name = new_name.replace("tpu_batch_normalization_1.beta",
                                                "project_conv.bn.bias")
                    new_name = new_name.replace("tpu_batch_normalization_1.gamma",
                                                "project_conv.bn.weight")
                    new_name = new_name.replace("tpu_batch_normalization_1.moving_mean",
                                                "project_conv.bn.running_mean")
                    new_name = new_name.replace("tpu_batch_normalization_1.moving_variance",
                                                "project_conv.bn.running_var")
                else:  # mbconv
                    new_name = new_name.replace("blocks.{}.conv2d_1.kernel".format(blocks_id),
                                                "blocks.{}.project_conv.conv.weight".format(blocks_id))

                    new_name = new_name.replace("depthwise_conv2d.depthwise_kernel",
                                                "dwconv.conv.weight")

                    new_name = new_name.replace("tpu_batch_normalization_1.beta",
                                                "dwconv.bn.bias")
                    new_name = new_name.replace("tpu_batch_normalization_1.gamma",
                                                "dwconv.bn.weight")
                    new_name = new_name.replace("tpu_batch_normalization_1.moving_mean",
                                                "dwconv.bn.running_mean")
                    new_name = new_name.replace("tpu_batch_normalization_1.moving_variance",
                                                "dwconv.bn.running_var")

                    new_name = new_name.replace("tpu_batch_normalization_2.beta",
                                                "project_conv.bn.bias")
                    new_name = new_name.replace("tpu_batch_normalization_2.gamma",
                                                "project_conv.bn.weight")
                    new_name = new_name.replace("tpu_batch_normalization_2.moving_mean",
                                                "project_conv.bn.running_mean")
                    new_name = new_name.replace("tpu_batch_normalization_2.moving_variance",
                                                "project_conv.bn.running_var")

                    new_name = new_name.replace("se.conv2d.bias",
                                                "se.conv_reduce.bias")
                    new_name = new_name.replace("se.conv2d.kernel",
                                                "se.conv_reduce.weight")
                    new_name = new_name.replace("se.conv2d_1.bias",
                                                "se.conv_expand.bias")
                    new_name = new_name.replace("se.conv2d_1.kernel",
                                                "se.conv_expand.weight")
        else:
            print("not recognized name: " + v[0])

        var = reader.get_tensor(v[0])
        new_var = var
        if "conv" in new_name and "weight" in new_name and "bn" not in new_name and "dw" not in new_name:
            assert len(var.shape) == 4
            # conv kernel [h, w, c, n] -> [n, c, h, w]
            new_var = np.transpose(var, (3, 2, 0, 1))
        elif "bn" in new_name:
            pass
        elif "dwconv" in new_name and "weight" in new_name:
            # dw_kernel [h, w, n, c] -> [n, c, h, w]
            assert len(var.shape) == 4
            new_var = np.transpose(var, (2, 3, 0, 1))
        elif "classifier" in new_name and "weight" in new_name:
            assert len(var.shape) == 2
            new_var = np.transpose(var, (1, 0))

        new_weights[new_name] = torch.as_tensor(new_var)

    torch.save(new_weights, "pre_" + model_name + ".pth")


if __name__ == '__main__':
    main(model_name="efficientnetv2-s",
         tf_weights_path="./efficientnetv2-s/model",
         stage0_num=2,
         fused_conv_num=10)

    # main(model_name="efficientnetv2-m",
    #      tf_weights_path="./efficientnetv2-m/model",
    #      stage0_num=3,
    #      fused_conv_num=13)

    # main(model_name="efficientnetv2-l",
    #      tf_weights_path="./efficientnetv2-l/model",
    #      stage0_num=4,
    #      fused_conv_num=18)


================================================
FILE: pytorch_classification/Test11_efficientnetV2/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    accu_loss = torch.zeros(1).to(device)  # 累计损失
    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    optimizer.zero_grad()

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        accu_loss += loss.detach()

        data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


@torch.no_grad()
def evaluate(model, data_loader, device, epoch):
    loss_function = torch.nn.CrossEntropyLoss()

    model.eval()

    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    accu_loss = torch.zeros(1).to(device)  # 累计损失

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        accu_loss += loss

        data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


================================================
FILE: pytorch_classification/Test1_official_demo/model.py
================================================
import torch.nn as nn
import torch.nn.functional as F


class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))    # input(3, 32, 32) output(16, 28, 28)
        x = self.pool1(x)            # output(16, 14, 14)
        x = F.relu(self.conv2(x))    # output(32, 10, 10)
        x = self.pool2(x)            # output(32, 5, 5)
        x = x.view(-1, 32*5*5)       # output(32*5*5)
        x = F.relu(self.fc1(x))      # output(120)
        x = F.relu(self.fc2(x))      # output(84)
        x = self.fc3(x)              # output(10)
        return x


================================================
FILE: pytorch_classification/Test1_official_demo/predict.py
================================================
import torch
import torchvision.transforms as transforms
from PIL import Image

from model import LeNet


def main():
    transform = transforms.Compose(
        [transforms.Resize((32, 32)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    classes = ('plane', 'car', 'bird', 'cat',
               'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    net = LeNet()
    net.load_state_dict(torch.load('Lenet.pth'))

    im = Image.open('1.jpg')
    im = transform(im)  # [C, H, W]
    im = torch.unsqueeze(im, dim=0)  # [N, C, H, W]

    with torch.no_grad():
        outputs = net(im)
        predict = torch.max(outputs, dim=1)[1].numpy()
    print(classes[int(predict)])


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test1_official_demo/train.py
================================================
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms


def main():
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # 50000张训练图片
    # 第一次使用时要将download设置为True才会自动去下载数据集
    train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,
                                               shuffle=True, num_workers=0)

    # 10000张验证图片
    # 第一次使用时要将download设置为True才会自动去下载数据集
    val_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=False, transform=transform)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000,
                                             shuffle=False, num_workers=0)
    val_data_iter = iter(val_loader)
    val_image, val_label = next(val_data_iter)
    
    # classes = ('plane', 'car', 'bird', 'cat',
    #            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    net = LeNet()
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    for epoch in range(5):  # loop over the dataset multiple times

        running_loss = 0.0
        for step, data in enumerate(train_loader, start=0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if step % 500 == 499:    # print every 500 mini-batches
                with torch.no_grad():
                    outputs = net(val_image)  # [batch, 10]
                    predict_y = torch.max(outputs, dim=1)[1]
                    accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)

                    print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %
                          (epoch + 1, step + 1, running_loss / 500, accuracy))
                    running_loss = 0.0

    print('Finished Training')

    save_path = './Lenet.pth'
    torch.save(net.state_dict(), save_path)


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test2_alexnet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test2_alexnet/model.py
================================================
import torch.nn as nn
import torch


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]
            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(128 * 6 * 6, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


================================================
FILE: pytorch_classification/Test2_alexnet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import AlexNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)

    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = AlexNet(num_classes=5).to(device)

    # load model weights
    weights_path = "./AlexNet.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path))

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test2_alexnet/train.py
================================================
import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm

from model import AlexNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=5, init_weights=True)

    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    # pata = list(net.parameters())
    optimizer = optim.Adam(net.parameters(), lr=0.0002)

    epochs = 10
    save_path = './AlexNet.pth'
    best_acc = 0.0
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test3_vggnet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test3_vggnet/model.py
================================================
import torch.nn as nn
import torch

# official pretrain weights
model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}


class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=False):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes)
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = torch.flatten(x, start_dim=1)
        # N x 512*7*7
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


def make_features(cfg: list):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU(True)]
            in_channels = v
    return nn.Sequential(*layers)


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg(model_name="vgg16", **kwargs):
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]

    model = VGG(make_features(cfg), **kwargs)
    return model


================================================
FILE: pytorch_classification/Test3_vggnet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import vgg


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)
    
    # create model
    model = vgg(model_name="vgg16", num_classes=5).to(device)
    # load model weights
    weights_path = "./vgg16Net.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location=device))

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test3_vggnet/train.py
================================================
import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm

from model import vgg


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()

    model_name = "vgg16"
    net = vgg(model_name=model_name, num_classes=5, init_weights=True)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 30
    best_acc = 0.0
    save_path = './{}Net.pth'.format(model_name)
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test4_googlenet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test4_googlenet/model.py
================================================
import torch.nn as nn
import torch
import torch.nn.functional as F


class GoogLeNet(nn.Module):
    def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):
        super(GoogLeNet, self).__init__()
        self.aux_logits = aux_logits

        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.conv2 = BasicConv2d(64, 64, kernel_size=1)
        self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)

        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux1 = self.aux1(x)

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux2 = self.aux2(x)

        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        if self.training and self.aux_logits:   # eval model lose this layer
            return x, aux2, aux1
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()

        self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            BasicConv2d(in_channels, ch3x3red, kernel_size=1),
            BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)   # 保证输出大小等于输入大小
        )

        self.branch3 = nn.Sequential(
            BasicConv2d(in_channels, ch5x5red, kernel_size=1),
            # 在官方的实现中，其实是3x3的kernel并不是5x5，这里我也懒得改了，具体可以参考下面的issue
            # Please see https://github.com/pytorch/vision/issues/906 for details.
            BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)   # 保证输出大小等于输入大小
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        return torch.cat(outputs, 1)


class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # output[batch, 128, 4, 4]

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(x)
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        return x


class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x


================================================
FILE: pytorch_classification/Test4_googlenet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import GoogLeNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = GoogLeNet(num_classes=5, aux_logits=False).to(device)

    # load model weights
    weights_path = "./googleNet.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    missing_keys, unexpected_keys = model.load_state_dict(torch.load(weights_path, map_location=device),
                                                          strict=False)

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test4_googlenet/train.py
================================================
import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm

from model import GoogLeNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()

    net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True)
    # 如果要使用官方的预训练权重，注意是将权重载入官方的模型，不是我们自己实现的模型
    # 官方的模型中使用了bn层以及改了一些参数，不能混用
    # import torchvision
    # net = torchvision.models.googlenet(num_classes=5)
    # model_dict = net.state_dict()
    # # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth
    # pretrain_model = torch.load("googlenet.pth")
    # del_list = ["aux1.fc2.weight", "aux1.fc2.bias",
    #             "aux2.fc2.weight", "aux2.fc2.bias",
    #             "fc.weight", "fc.bias"]
    # pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}
    # model_dict.update(pretrain_dict)
    # net.load_state_dict(model_dict)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0003)

    epochs = 30
    best_acc = 0.0
    save_path = './googleNet.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits, aux_logits2, aux_logits1 = net(images.to(device))
            loss0 = loss_function(logits, labels.to(device))
            loss1 = loss_function(aux_logits1, labels.to(device))
            loss2 = loss_function(aux_logits2, labels.to(device))
            loss = loss0 + loss1 * 0.3 + loss2 * 0.3
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))  # eval model only have last output layer
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test5_resnet/README.md
================================================
## 文件结构：
```
  ├── model.py: ResNet模型搭建
  ├── train.py: 训练脚本
  ├── predict.py: 单张图像预测脚本
  └── batch_predict.py: 批量图像预测脚本
```

================================================
FILE: pytorch_classification/Test5_resnet/batch_predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms

from model import resnet34


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    # 指向需要遍历预测的图像文件夹
    imgs_root = "/data/imgs"
    assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist."
    # 读取指定文件夹下所有jpg图像路径
    img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")]

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), f"file: '{json_path}' dose not exist."

    json_file = open(json_path, "r")
    class_indict = json.load(json_file)

    # create model
    model = resnet34(num_classes=5).to(device)

    # load model weights
    weights_path = "./resNet34.pth"
    assert os.path.exists(weights_path), f"file: '{weights_path}' dose not exist."
    model.load_state_dict(torch.load(weights_path, map_location=device))

    # prediction
    model.eval()
    batch_size = 8  # 每次预测时将多少张图片打包成一个batch
    with torch.no_grad():
        for ids in range(0, len(img_path_list) // batch_size):
            img_list = []
            for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:
                assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
                img = Image.open(img_path)
                img = data_transform(img)
                img_list.append(img)

            # batch img
            # 将img_list列表中的所有图像打包成一个batch
            batch_img = torch.stack(img_list, dim=0)
            # predict class
            output = model(batch_img.to(device)).cpu()
            predict = torch.softmax(output, dim=1)
            probs, classes = torch.max(predict, dim=1)

            for idx, (pro, cla) in enumerate(zip(probs, classes)):
                print("image: {}  class: {}  prob: {:.3}".format(img_path_list[ids * batch_size + idx],
                                                                 class_indict[str(cla.numpy())],
                                                                 pro.numpy()))


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test5_resnet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test5_resnet/load_weights.py
================================================
import os
import torch
import torch.nn as nn
from model import resnet34


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load pretrain weights
    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
    model_weight_path = "./resnet34-pre.pth"
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)

    # option1
    net = resnet34()
    net.load_state_dict(torch.load(model_weight_path, map_location=device))
    # change fc layer structure
    in_channel = net.fc.in_features
    net.fc = nn.Linear(in_channel, 5)

    # option2
    # net = resnet34(num_classes=5)
    # pre_weights = torch.load(model_weight_path, map_location=device)
    # del_key = []
    # for key, _ in pre_weights.items():
    #     if "fc" in key:
    #         del_key.append(key)
    #
    # for key in del_key:
    #     del pre_weights[key]
    #
    # missing_keys, unexpected_keys = net.load_state_dict(pre_weights, strict=False)
    # print("[missing_keys]:", *missing_keys, sep="\n")
    # print("[unexpected_keys]:", *unexpected_keys, sep="\n")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test5_resnet/model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    """
    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
    这么做的好处是能够在top1上提升大概0.5%的准确率。
    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
    """
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None,
                 groups=1, width_per_group=64):
        super(Bottleneck, self).__init__()

        width = int(out_channel * (width_per_group / 64.)) * groups

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self,
                 block,
                 blocks_num,
                 num_classes=1000,
                 include_top=True,
                 groups=1,
                 width_per_group=64):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.groups = groups
        self.width_per_group = width_per_group

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel,
                            channel,
                            downsample=downsample,
                            stride=stride,
                            groups=self.groups,
                            width_per_group=self.width_per_group))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel,
                                channel,
                                groups=self.groups,
                                width_per_group=self.width_per_group))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet50(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnet50-19c8e357.pth
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


def resnext50_32x4d(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
    groups = 32
    width_per_group = 4
    return ResNet(Bottleneck, [3, 4, 6, 3],
                  num_classes=num_classes,
                  include_top=include_top,
                  groups=groups,
                  width_per_group=width_per_group)


def resnext101_32x8d(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
    groups = 32
    width_per_group = 8
    return ResNet(Bottleneck, [3, 4, 23, 3],
                  num_classes=num_classes,
                  include_top=include_top,
                  groups=groups,
                  width_per_group=width_per_group)


================================================
FILE: pytorch_classification/Test5_resnet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import resnet34


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = resnet34(num_classes=5).to(device)

    # load model weights
    weights_path = "./resNet34.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location=device))

    # prediction
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test5_resnet/train.py
================================================
import os
import sys
import json

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm

from model import resnet34


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    
    net = resnet34()
    # load pretrain weights
    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
    model_weight_path = "./resnet34-pre.pth"
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
    net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
    # for param in net.parameters():
    #     param.requires_grad = False

    # change fc layer structure
    in_channel = net.fc.in_features
    net.fc = nn.Linear(in_channel, 5)
    net.to(device)

    # define loss function
    loss_function = nn.CrossEntropyLoss()

    # construct an optimizer
    params = [p for p in net.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=0.0001)

    epochs = 3
    best_acc = 0.0
    save_path = './resNet34.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits = net(images.to(device))
            loss = loss_function(logits, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                # loss = loss_function(outputs, test_labels)
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

                val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
                                                           epochs)

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test6_mobilenet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test6_mobilenet/model_v2.py
================================================
from torch import nn
import torch


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel

        layers = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))
        layers.extend([
            # 3x3 depthwise conv
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
            # 1x1 pointwise conv(linear)
            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # conv1 layer
        features.append(ConvBNReLU(3, input_channel, stride=2))
        # building inverted residual residual blockes
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, last_channel, 1))
        # combine feature layers
        self.features = nn.Sequential(*features)

        # building classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


================================================
FILE: pytorch_classification/Test6_mobilenet/model_v3.py
================================================
from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer(inplace=True))


class SqueezeExcitation(nn.Module):
    def __init__(self, input_c: int, squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)
        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)
        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = F.relu(scale, inplace=True)
        scale = self.fc2(scale)
        scale = F.hardsigmoid(scale, inplace=True)
        return scale * x


class InvertedResidualConfig:
    def __init__(self,
                 input_c: int,
                 kernel: int,
                 expanded_c: int,
                 out_c: int,
                 use_se: bool,
                 activation: str,
                 stride: int,
                 width_multi: float):
        self.input_c = self.adjust_channels(input_c, width_multi)
        self.kernel = kernel
        self.expanded_c = self.adjust_channels(expanded_c, width_multi)
        self.out_c = self.adjust_channels(out_c, width_multi)
        self.use_se = use_se
        self.use_hs = activation == "HS"  # whether using h-swish activation
        self.stride = stride

    @staticmethod
    def adjust_channels(channels: int, width_multi: float):
        return _make_divisible(channels * width_multi, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        layers: List[nn.Module] = []
        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU

        # expand
        if cnf.expanded_c != cnf.input_c:
            layers.append(ConvBNActivation(cnf.input_c,
                                           cnf.expanded_c,
                                           kernel_size=1,
                                           norm_layer=norm_layer,
                                           activation_layer=activation_layer))

        # depthwise
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.expanded_c,
                                       kernel_size=cnf.kernel,
                                       stride=cnf.stride,
                                       groups=cnf.expanded_c,
                                       norm_layer=norm_layer,
                                       activation_layer=activation_layer))

        if cnf.use_se:
            layers.append(SqueezeExcitation(cnf.expanded_c))

        # project
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.out_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Identity))

        self.block = nn.Sequential(*layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        if self.use_res_connect:
            result += x

        return result


class MobileNetV3(nn.Module):
    def __init__(self,
                 inverted_residual_setting: List[InvertedResidualConfig],
                 last_channel: int,
                 num_classes: int = 1000,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super(MobileNetV3, self).__init__()

        if not inverted_residual_setting:
            raise ValueError("The inverted_residual_setting should not be empty.")
        elif not (isinstance(inverted_residual_setting, List) and
                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
            raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers: List[nn.Module] = []

        # building first layer
        firstconv_output_c = inverted_residual_setting[0].input_c
        layers.append(ConvBNActivation(3,
                                       firstconv_output_c,
                                       kernel_size=3,
                                       stride=2,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.append(block(cnf, norm_layer))

        # building last several layers
        lastconv_input_c = inverted_residual_setting[-1].out_c
        lastconv_output_c = 6 * lastconv_input_c
        layers.append(ConvBNActivation(lastconv_input_c,
                                       lastconv_output_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),
                                        nn.Hardswish(inplace=True),
                                        nn.Dropout(p=0.2, inplace=True),
                                        nn.Linear(last_channel, num_classes))

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def mobilenet_v3_large(num_classes: int = 1000,
                       reduced_tail: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_conf(16, 3, 16, 16, False, "RE", 1),
        bneck_conf(16, 3, 64, 24, False, "RE", 2),  # C1
        bneck_conf(24, 3, 72, 24, False, "RE", 1),
        bneck_conf(24, 5, 72, 40, True, "RE", 2),  # C2
        bneck_conf(40, 5, 120, 40, True, "RE", 1),
        bneck_conf(40, 5, 120, 40, True, "RE", 1),
        bneck_conf(40, 3, 240, 80, False, "HS", 2),  # C3
        bneck_conf(80, 3, 200, 80, False, "HS", 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1),
        bneck_conf(80, 3, 480, 112, True, "HS", 1),
        bneck_conf(112, 3, 672, 112, True, "HS", 1),
        bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2),  # C4
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1),
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1),
    ]
    last_channel = adjust_channels(1280 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


def mobilenet_v3_small(num_classes: int = 1000,
                       reduced_tail: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_conf(16, 3, 16, 16, True, "RE", 2),  # C1
        bneck_conf(16, 3, 72, 24, False, "RE", 2),  # C2
        bneck_conf(24, 3, 88, 24, False, "RE", 1),
        bneck_conf(24, 5, 96, 40, True, "HS", 2),  # C3
        bneck_conf(40, 5, 240, 40, True, "HS", 1),
        bneck_conf(40, 5, 240, 40, True, "HS", 1),
        bneck_conf(40, 5, 120, 48, True, "HS", 1),
        bneck_conf(48, 5, 144, 48, True, "HS", 1),
        bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2),  # C4
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1),
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1)
    ]
    last_channel = adjust_channels(1024 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


================================================
FILE: pytorch_classification/Test6_mobilenet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model_v2 import MobileNetV2


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = MobileNetV2(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./MobileNetV2.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test6_mobilenet/train.py
================================================
import os
import sys
import json

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm

from model_v2 import MobileNetV2


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    batch_size = 16
    epochs = 5

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    # create model
    net = MobileNetV2(num_classes=5)

    # load pretrain weights
    # download url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    model_weight_path = "./mobilenet_v2.pth"
    assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path)
    pre_weights = torch.load(model_weight_path, map_location='cpu')

    # delete classifier weights
    pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()}
    missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False)

    # freeze features weights
    for param in net.features.parameters():
        param.requires_grad = False

    net.to(device)

    # define loss function
    loss_function = nn.CrossEntropyLoss()

    # construct an optimizer
    params = [p for p in net.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=0.0001)

    best_acc = 0.0
    save_path = './MobileNetV2.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits = net(images.to(device))
            loss = loss_function(logits, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                # loss = loss_function(outputs, test_labels)
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

                val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
                                                           epochs)
        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test7_shufflenet/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/Test7_shufflenet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: pytorch_classification/Test7_shufflenet/model.py
================================================
from typing import List, Callable

import torch
from torch import Tensor
import torch.nn as nn


def channel_shuffle(x: Tensor, groups: int) -> Tensor:

    batch_size, num_channels, height, width = x.size()
    channels_per_group = num_channels // groups

    # reshape
    # [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width]
    x = x.view(batch_size, groups, channels_per_group, height, width)

    x = torch.transpose(x, 1, 2).contiguous()

    # flatten
    x = x.view(batch_size, -1, height, width)

    return x


class InvertedResidual(nn.Module):
    def __init__(self, input_c: int, output_c: int, stride: int):
        super(InvertedResidual, self).__init__()

        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")
        self.stride = stride

        assert output_c % 2 == 0
        branch_features = output_c // 2
        # 当stride为1时，input_channel应该是branch_features的两倍
        # python中 '<<' 是位运算，可理解为计算×2的快速方法
        assert (self.stride != 1) or (input_c == branch_features << 1)

        if self.stride == 2:
            self.branch1 = nn.Sequential(
                self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1),
                nn.BatchNorm2d(input_c),
                nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(branch_features),
                nn.ReLU(inplace=True)
            )
        else:
            self.branch1 = nn.Sequential()

        self.branch2 = nn.Sequential(
            nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1,
                      stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True),
            self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1),
            nn.BatchNorm2d(branch_features),
            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True)
        )

    @staticmethod
    def depthwise_conv(input_c: int,
                       output_c: int,
                       kernel_s: int,
                       stride: int = 1,
                       padding: int = 0,
                       bias: bool = False) -> nn.Conv2d:
        return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s,
                         stride=stride, padding=padding, bias=bias, groups=input_c)

    def forward(self, x: Tensor) -> Tensor:
        if self.stride == 1:
            x1, x2 = x.chunk(2, dim=1)
            out = torch.cat((x1, self.branch2(x2)), dim=1)
        else:
            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)

        out = channel_shuffle(out, 2)

        return out


class ShuffleNetV2(nn.Module):
    def __init__(self,
                 stages_repeats: List[int],
                 stages_out_channels: List[int],
                 num_classes: int = 1000,
                 inverted_residual: Callable[..., nn.Module] = InvertedResidual):
        super(ShuffleNetV2, self).__init__()

        if len(stages_repeats) != 3:
            raise ValueError("expected stages_repeats as list of 3 positive ints")
        if len(stages_out_channels) != 5:
            raise ValueError("expected stages_out_channels as list of 5 positive ints")
        self._stage_out_channels = stages_out_channels

        # input RGB image
        input_channels = 3
        output_channels = self._stage_out_channels[0]

        self.conv1 = nn.Sequential(
            nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(inplace=True)
        )
        input_channels = output_channels

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Static annotations for mypy
        self.stage2: nn.Sequential
        self.stage3: nn.Sequential
        self.stage4: nn.Sequential

        stage_names = ["stage{}".format(i) for i in [2, 3, 4]]
        for name, repeats, output_channels in zip(stage_names, stages_repeats,
                                                  self._stage_out_channels[1:]):
            seq = [inverted_residual(input_channels, output_channels, 2)]
            for i in range(repeats - 1):
                seq.append(inverted_residual(output_channels, output_channels, 1))
            setattr(self, name, nn.Sequential(*seq))
            input_channels = output_channels

        output_channels = self._stage_out_channels[-1]
        self.conv5 = nn.Sequential(
            nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(inplace=True)
        )

        self.fc = nn.Linear(output_channels, num_classes)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.conv5(x)
        x = x.mean([2, 3])  # global pool
        x = self.fc(x)
        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def shufflenet_v2_x0_5(num_classes=1000):
    """
    Constructs a ShuffleNetV2 with 0.5x output channels, as described in
    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
    <https://arxiv.org/abs/1807.11164>`.
    weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth

    :param num_classes:
    :return:
    """
    model = ShuffleNetV2(stages_repeats=[4, 8, 4],
                         stages_out_channels=[24, 48, 96, 192, 1024],
                         num_classes=num_classes)

    return model


def shufflenet_v2_x1_0(num_classes=1000):
    """
    Constructs a ShuffleNetV2 with 1.0x output channels, as described in
    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
    <https://arxiv.org/abs/1807.11164>`.
    weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth

    :param num_classes:
    :return:
    """
    model = ShuffleNetV2(stages_repeats=[4, 8, 4],
                         stages_out_channels=[24, 116, 232, 464, 1024],
                         num_classes=num_classes)

    return model


def shufflenet_v2_x1_5(num_classes=1000):
    """
    Constructs a ShuffleNetV2 with 1.0x output channels, as described in
    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
    <https://arxiv.org/abs/1807.11164>`.
    weight: https://download.pytorch.org/models/shufflenetv2_x1_5-3c479a10.pth

    :param num_classes:
    :return:
    """
    model = ShuffleNetV2(stages_repeats=[4, 8, 4],
                         stages_out_channels=[24, 176, 352, 704, 1024],
                         num_classes=num_classes)

    return model


def shufflenet_v2_x2_0(num_classes=1000):
    """
    Constructs a ShuffleNetV2 with 1.0x output channels, as described in
    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
    <https://arxiv.org/abs/1807.11164>`.
    weight: https://download.pytorch.org/models/shufflenetv2_x2_0-8be3c8ee.pth

    :param num_classes:
    :return:
    """
    model = ShuffleNetV2(stages_repeats=[4, 8, 4],
                         stages_out_channels=[24, 244, 488, 976, 2048],
                         num_classes=num_classes)

    return model


================================================
FILE: pytorch_classification/Test7_shufflenet/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/Test7_shufflenet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import shufflenet_v2_x1_0


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = shufflenet_v2_x1_0(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-29.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test7_shufflenet/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import shufflenet_v2_x1_0
from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # 如果存在预训练权重则载入
    model = shufflenet_v2_x1_0(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {k: v for k, v in weights_dict.items()
                                 if model.state_dict()[k].numel() == v.numel()}
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)

        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lrf', type=float, default=0.1)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # shufflenetv2_x1.0 官方权重下载地址
    # https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
    parser.add_argument('--weights', type=str, default='./shufflenetv2_x1.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/Test7_shufflenet/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data

        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 验证样本总个数
    total_num = len(data_loader.dataset)

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    return sum_num.item() / total_num


================================================
FILE: pytorch_classification/Test8_densenet/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/Test8_densenet/model.py
================================================
import re
from typing import Any, List, Tuple
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as cp
from torch import Tensor


class _DenseLayer(nn.Module):
    def __init__(self,
                 input_c: int,
                 growth_rate: int,
                 bn_size: int,
                 drop_rate: float,
                 memory_efficient: bool = False):
        super(_DenseLayer, self).__init__()

        self.add_module("norm1", nn.BatchNorm2d(input_c))
        self.add_module("relu1", nn.ReLU(inplace=True))
        self.add_module("conv1", nn.Conv2d(in_channels=input_c,
                                           out_channels=bn_size * growth_rate,
                                           kernel_size=1,
                                           stride=1,
                                           bias=False))
        self.add_module("norm2", nn.BatchNorm2d(bn_size * growth_rate))
        self.add_module("relu2", nn.ReLU(inplace=True))
        self.add_module("conv2", nn.Conv2d(bn_size * growth_rate,
                                           growth_rate,
                                           kernel_size=3,
                                           stride=1,
                                           padding=1,
                                           bias=False))
        self.drop_rate = drop_rate
        self.memory_efficient = memory_efficient

    def bn_function(self, inputs: List[Tensor]) -> Tensor:
        concat_features = torch.cat(inputs, 1)
        bottleneck_output = self.conv1(self.relu1(self.norm1(concat_features)))
        return bottleneck_output

    @staticmethod
    def any_requires_grad(inputs: List[Tensor]) -> bool:
        for tensor in inputs:
            if tensor.requires_grad:
                return True

        return False

    @torch.jit.unused
    def call_checkpoint_bottleneck(self, inputs: List[Tensor]) -> Tensor:
        def closure(*inp):
            return self.bn_function(inp)

        return cp.checkpoint(closure, *inputs)

    def forward(self, inputs: Tensor) -> Tensor:
        if isinstance(inputs, Tensor):
            prev_features = [inputs]
        else:
            prev_features = inputs

        if self.memory_efficient and self.any_requires_grad(prev_features):
            if torch.jit.is_scripting():
                raise Exception("memory efficient not supported in JIT")

            bottleneck_output = self.call_checkpoint_bottleneck(prev_features)
        else:
            bottleneck_output = self.bn_function(prev_features)

        new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
        if self.drop_rate > 0:
            new_features = F.dropout(new_features,
                                     p=self.drop_rate,
                                     training=self.training)

        return new_features


class _DenseBlock(nn.ModuleDict):
    _version = 2

    def __init__(self,
                 num_layers: int,
                 input_c: int,
                 bn_size: int,
                 growth_rate: int,
                 drop_rate: float,
                 memory_efficient: bool = False):
        super(_DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = _DenseLayer(input_c + i * growth_rate,
                                growth_rate=growth_rate,
                                bn_size=bn_size,
                                drop_rate=drop_rate,
                                memory_efficient=memory_efficient)
            self.add_module("denselayer%d" % (i + 1), layer)

    def forward(self, init_features: Tensor) -> Tensor:
        features = [init_features]
        for name, layer in self.items():
            new_features = layer(features)
            features.append(new_features)
        return torch.cat(features, 1)


class _Transition(nn.Sequential):
    def __init__(self,
                 input_c: int,
                 output_c: int):
        super(_Transition, self).__init__()
        self.add_module("norm", nn.BatchNorm2d(input_c))
        self.add_module("relu", nn.ReLU(inplace=True))
        self.add_module("conv", nn.Conv2d(input_c,
                                          output_c,
                                          kernel_size=1,
                                          stride=1,
                                          bias=False))
        self.add_module("pool", nn.AvgPool2d(kernel_size=2, stride=2))


class DenseNet(nn.Module):
    """
    Densenet-BC model class for imagenet

    Args:
        growth_rate (int) - how many filters to add each layer (`k` in paper)
        block_config (list of 4 ints) - how many layers in each pooling block
        num_init_features (int) - the number of filters to learn in the first convolution layer
        bn_size (int) - multiplicative factor for number of bottle neck layers
          (i.e. bn_size * k features in the bottleneck layer)
        drop_rate (float) - dropout rate after each dense layer
        num_classes (int) - number of classification classes
        memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient
    """

    def __init__(self,
                 growth_rate: int = 32,
                 block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),
                 num_init_features: int = 64,
                 bn_size: int = 4,
                 drop_rate: float = 0,
                 num_classes: int = 1000,
                 memory_efficient: bool = False):
        super(DenseNet, self).__init__()

        # first conv+bn+relu+pool
        self.features = nn.Sequential(OrderedDict([
            ("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
            ("norm0", nn.BatchNorm2d(num_init_features)),
            ("relu0", nn.ReLU(inplace=True)),
            ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        ]))

        # each dense block
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers,
                                input_c=num_features,
                                bn_size=bn_size,
                                growth_rate=growth_rate,
                                drop_rate=drop_rate,
                                memory_efficient=memory_efficient)
            self.features.add_module("denseblock%d" % (i + 1), block)
            num_features = num_features + num_layers * growth_rate

            if i != len(block_config) - 1:
                trans = _Transition(input_c=num_features,
                                    output_c=num_features // 2)
                self.features.add_module("transition%d" % (i + 1), trans)
                num_features = num_features // 2

        # finnal batch norm
        self.features.add_module("norm5", nn.BatchNorm2d(num_features))

        # fc layer
        self.classifier = nn.Linear(num_features, num_classes)

        # init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x: Tensor) -> Tensor:
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out


def densenet121(**kwargs: Any) -> DenseNet:
    # Top-1 error: 25.35%
    # 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth'
    return DenseNet(growth_rate=32,
                    block_config=(6, 12, 24, 16),
                    num_init_features=64,
                    **kwargs)


def densenet169(**kwargs: Any) -> DenseNet:
    # Top-1 error: 24.00%
    # 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth'
    return DenseNet(growth_rate=32,
                    block_config=(6, 12, 32, 32),
                    num_init_features=64,
                    **kwargs)


def densenet201(**kwargs: Any) -> DenseNet:
    # Top-1 error: 22.80%
    # 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth'
    return DenseNet(growth_rate=32,
                    block_config=(6, 12, 48, 32),
                    num_init_features=64,
                    **kwargs)


def densenet161(**kwargs: Any) -> DenseNet:
    # Top-1 error: 22.35%
    # 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth'
    return DenseNet(growth_rate=48,
                    block_config=(6, 12, 36, 24),
                    num_init_features=96,
                    **kwargs)


def load_state_dict(model: nn.Module, weights_path: str) -> None:
    # '.'s are no longer allowed in module names, but previous _DenseLayer
    # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
    # They are also in the checkpoints in model_urls. This pattern is used
    # to find such keys.
    pattern = re.compile(
        r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')

    state_dict = torch.load(weights_path)

    num_classes = model.classifier.out_features
    load_fc = num_classes == 1000

    for key in list(state_dict.keys()):
        if load_fc is False:
            if "classifier" in key:
                del state_dict[key]

        res = pattern.match(key)
        if res:
            new_key = res.group(1) + res.group(2)
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
    model.load_state_dict(state_dict, strict=load_fc)
    print("successfully load pretrain-weights.")


================================================
FILE: pytorch_classification/Test8_densenet/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/Test8_densenet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import densenet121


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = densenet121(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-3.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test8_densenet/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import densenet121, load_state_dict
from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # 如果存在预训练权重则载入
    model = densenet121(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            load_state_dict(model, args.weights)
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "classifier" not in name:
                para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4, nesterov=True)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)

        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.1)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # densenet121 官方权重下载地址
    # https://download.pytorch.org/models/densenet121-a639ec97.pth
    parser.add_argument('--weights', type=str, default='densenet121.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/Test8_densenet/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data

        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 验证样本总个数
    total_num = len(data_loader.dataset)

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    return sum_num.item() / total_num


================================================
FILE: pytorch_classification/Test9_efficientNet/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw  密码: 090i
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/Test9_efficientNet/model.py
================================================
import math
import copy
from functools import partial
from collections import OrderedDict
from typing import Optional, Callable

import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import functional as F


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf

    This function is taken from the rwightman.
    It can be seen here:
    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)

        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer())


class SqueezeExcitation(nn.Module):
    def __init__(self,
                 input_c: int,   # block input channel
                 expand_c: int,  # block expand channel
                 squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = input_c // squeeze_factor
        self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1)
        self.ac1 = nn.SiLU()  # alias Swish
        self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1)
        self.ac2 = nn.Sigmoid()

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = self.ac1(scale)
        scale = self.fc2(scale)
        scale = self.ac2(scale)
        return scale * x


class InvertedResidualConfig:
    # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate
    def __init__(self,
                 kernel: int,          # 3 or 5
                 input_c: int,
                 out_c: int,
                 expanded_ratio: int,  # 1 or 6
                 stride: int,          # 1 or 2
                 use_se: bool,         # True
                 drop_rate: float,
                 index: str,           # 1a, 2a, 2b, ...
                 width_coefficient: float):
        self.input_c = self.adjust_channels(input_c, width_coefficient)
        self.kernel = kernel
        self.expanded_c = self.input_c * expanded_ratio
        self.out_c = self.adjust_channels(out_c, width_coefficient)
        self.use_se = use_se
        self.stride = stride
        self.drop_rate = drop_rate
        self.index = index

    @staticmethod
    def adjust_channels(channels: int, width_coefficient: float):
        return _make_divisible(channels * width_coefficient, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        layers = OrderedDict()
        activation_layer = nn.SiLU  # alias Swish

        # expand
        if cnf.expanded_c != cnf.input_c:
            layers.update({"expand_conv": ConvBNActivation(cnf.input_c,
                                                           cnf.expanded_c,
                                                           kernel_size=1,
                                                           norm_layer=norm_layer,
                                                           activation_layer=activation_layer)})

        # depthwise
        layers.update({"dwconv": ConvBNActivation(cnf.expanded_c,
                                                  cnf.expanded_c,
                                                  kernel_size=cnf.kernel,
                                                  stride=cnf.stride,
                                                  groups=cnf.expanded_c,
                                                  norm_layer=norm_layer,
                                                  activation_layer=activation_layer)})

        if cnf.use_se:
            layers.update({"se": SqueezeExcitation(cnf.input_c,
                                                   cnf.expanded_c)})

        # project
        layers.update({"project_conv": ConvBNActivation(cnf.expanded_c,
                                                        cnf.out_c,
                                                        kernel_size=1,
                                                        norm_layer=norm_layer,
                                                        activation_layer=nn.Identity)})

        self.block = nn.Sequential(layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

        # 只有在使用shortcut连接时才使用dropout层
        if self.use_res_connect and cnf.drop_rate > 0:
            self.dropout = DropPath(cnf.drop_rate)
        else:
            self.dropout = nn.Identity()

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        result = self.dropout(result)
        if self.use_res_connect:
            result += x

        return result


class EfficientNet(nn.Module):
    def __init__(self,
                 width_coefficient: float,
                 depth_coefficient: float,
                 num_classes: int = 1000,
                 dropout_rate: float = 0.2,
                 drop_connect_rate: float = 0.2,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None
                 ):
        super(EfficientNet, self).__init__()

        # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate, repeats
        default_cnf = [[3, 32, 16, 1, 1, True, drop_connect_rate, 1],
                       [3, 16, 24, 6, 2, True, drop_connect_rate, 2],
                       [5, 24, 40, 6, 2, True, drop_connect_rate, 2],
                       [3, 40, 80, 6, 2, True, drop_connect_rate, 3],
                       [5, 80, 112, 6, 1, True, drop_connect_rate, 3],
                       [5, 112, 192, 6, 2, True, drop_connect_rate, 4],
                       [3, 192, 320, 6, 1, True, drop_connect_rate, 1]]

        def round_repeats(repeats):
            """Round number of repeats based on depth multiplier."""
            return int(math.ceil(depth_coefficient * repeats))

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)

        adjust_channels = partial(InvertedResidualConfig.adjust_channels,
                                  width_coefficient=width_coefficient)

        # build inverted_residual_setting
        bneck_conf = partial(InvertedResidualConfig,
                             width_coefficient=width_coefficient)

        b = 0
        num_blocks = float(sum(round_repeats(i[-1]) for i in default_cnf))
        inverted_residual_setting = []
        for stage, args in enumerate(default_cnf):
            cnf = copy.copy(args)
            for i in range(round_repeats(cnf.pop(-1))):
                if i > 0:
                    # strides equal 1 except first cnf
                    cnf[-3] = 1  # strides
                    cnf[1] = cnf[2]  # input_channel equal output_channel

                cnf[-1] = args[-2] * b / num_blocks  # update dropout ratio
                index = str(stage + 1) + chr(i + 97)  # 1a, 2a, 2b, ...
                inverted_residual_setting.append(bneck_conf(*cnf, index))
                b += 1

        # create layers
        layers = OrderedDict()

        # first conv
        layers.update({"stem_conv": ConvBNActivation(in_planes=3,
                                                     out_planes=adjust_channels(32),
                                                     kernel_size=3,
                                                     stride=2,
                                                     norm_layer=norm_layer)})

        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.update({cnf.index: block(cnf, norm_layer)})

        # build top
        last_conv_input_c = inverted_residual_setting[-1].out_c
        last_conv_output_c = adjust_channels(1280)
        layers.update({"top": ConvBNActivation(in_planes=last_conv_input_c,
                                               out_planes=last_conv_output_c,
                                               kernel_size=1,
                                               norm_layer=norm_layer)})

        self.features = nn.Sequential(layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        classifier = []
        if dropout_rate > 0:
            classifier.append(nn.Dropout(p=dropout_rate, inplace=True))
        classifier.append(nn.Linear(last_conv_output_c, num_classes))
        self.classifier = nn.Sequential(*classifier)

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def efficientnet_b0(num_classes=1000):
    # input image size 224x224
    return EfficientNet(width_coefficient=1.0,
                        depth_coefficient=1.0,
                        dropout_rate=0.2,
                        num_classes=num_classes)


def efficientnet_b1(num_classes=1000):
    # input image size 240x240
    return EfficientNet(width_coefficient=1.0,
                        depth_coefficient=1.1,
                        dropout_rate=0.2,
                        num_classes=num_classes)


def efficientnet_b2(num_classes=1000):
    # input image size 260x260
    return EfficientNet(width_coefficient=1.1,
                        depth_coefficient=1.2,
                        dropout_rate=0.3,
                        num_classes=num_classes)


def efficientnet_b3(num_classes=1000):
    # input image size 300x300
    return EfficientNet(width_coefficient=1.2,
                        depth_coefficient=1.4,
                        dropout_rate=0.3,
                        num_classes=num_classes)


def efficientnet_b4(num_classes=1000):
    # input image size 380x380
    return EfficientNet(width_coefficient=1.4,
                        depth_coefficient=1.8,
                        dropout_rate=0.4,
                        num_classes=num_classes)


def efficientnet_b5(num_classes=1000):
    # input image size 456x456
    return EfficientNet(width_coefficient=1.6,
                        depth_coefficient=2.2,
                        dropout_rate=0.4,
                        num_classes=num_classes)


def efficientnet_b6(num_classes=1000):
    # input image size 528x528
    return EfficientNet(width_coefficient=1.8,
                        depth_coefficient=2.6,
                        dropout_rate=0.5,
                        num_classes=num_classes)


def efficientnet_b7(num_classes=1000):
    # input image size 600x600
    return EfficientNet(width_coefficient=2.0,
                        depth_coefficient=3.1,
                        dropout_rate=0.5,
                        num_classes=num_classes)


================================================
FILE: pytorch_classification/Test9_efficientNet/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/Test9_efficientNet/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import efficientnet_b0 as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    img_size = {"B0": 224,
                "B1": 240,
                "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}
    num_model = "B0"

    data_transform = transforms.Compose(
        [transforms.Resize(img_size[num_model]),
         transforms.CenterCrop(img_size[num_model]),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-29.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test9_efficientNet/requirements.txt
================================================
numpy
matplotlib
tqdm==4.56.0
torch>=1.7.1
torchvision>=0.8.2


================================================
FILE: pytorch_classification/Test9_efficientNet/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import efficientnet_b0 as create_model
from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = {"B0": 224,
                "B1": 240,
                "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}
    num_model = "B0"

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model]),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(img_size[num_model]),
                                   transforms.CenterCrop(img_size[num_model]),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # 如果存在预训练权重则载入
    model = create_model(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {k: v for k, v in weights_dict.items()
                                 if model.state_dict()[k].numel() == v.numel()}
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后一个卷积层和全连接层外，其他权重全部冻结
            if ("features.top" not in name) and ("classifier" not in name):
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)
        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lrf', type=float, default=0.01)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # download model weights
    # 链接: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw  密码: 090i
    parser.add_argument('--weights', type=str, default='./efficientnetb0.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/Test9_efficientNet/trans_weights_to_pytorch.py
================================================
import numpy as np
import torch
import tensorflow as tf

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    # save pytorch weights path
    save_path = "./efficientnetb0.pth"

    # create keras model and download weights
    # EfficientNetB0, EfficientNetB1, EfficientNetB2, ...
    m = tf.keras.applications.EfficientNetB0()

    weights_dict = dict()
    weights = m.weights[3:]  # delete norm weights
    for weight in weights:
        name = weight.name
        data = weight.numpy()

        if "stem_conv/kernel:0" == name:
            torch_name = "features.stem_conv.0.weight"
            weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)
        elif "stem_bn/gamma:0" == name:
            torch_name = "features.stem_conv.1.weight"
            weights_dict[torch_name] = data
        elif "stem_bn/beta:0" == name:
            torch_name = "features.stem_conv.1.bias"
            weights_dict[torch_name] = data
        elif "stem_bn/moving_mean:0" == name:
            torch_name = "features.stem_conv.1.running_mean"
            weights_dict[torch_name] = data
        elif "stem_bn/moving_variance:0" == name:
            torch_name = "features.stem_conv.1.running_var"
            weights_dict[torch_name] = data
        elif "block" in name:
            name = name[5:]  # delete "block" word
            block_index = name[:2]  # 1a, 2a, ...
            name = name[3:]  # delete block_index and "_"
            torch_prefix = "features.{}.block.".format(block_index)

            trans_dict = {"expand_conv/kernel:0": "expand_conv.0.weight",
                          "expand_bn/gamma:0": "expand_conv.1.weight",
                          "expand_bn/beta:0": "expand_conv.1.bias",
                          "expand_bn/moving_mean:0": "expand_conv.1.running_mean",
                          "expand_bn/moving_variance:0": "expand_conv.1.running_var",
                          "dwconv/depthwise_kernel:0": "dwconv.0.weight",
                          "bn/gamma:0": "dwconv.1.weight",
                          "bn/beta:0": "dwconv.1.bias",
                          "bn/moving_mean:0": "dwconv.1.running_mean",
                          "bn/moving_variance:0": "dwconv.1.running_var",
                          "se_reduce/kernel:0": "se.fc1.weight",
                          "se_reduce/bias:0": "se.fc1.bias",
                          "se_expand/kernel:0": "se.fc2.weight",
                          "se_expand/bias:0": "se.fc2.bias",
                          "project_conv/kernel:0": "project_conv.0.weight",
                          "project_bn/gamma:0": "project_conv.1.weight",
                          "project_bn/beta:0": "project_conv.1.bias",
                          "project_bn/moving_mean:0": "project_conv.1.running_mean",
                          "project_bn/moving_variance:0": "project_conv.1.running_var"}

            assert name in trans_dict, "key '{}' not in trans_dict".format(name)
            torch_postfix = trans_dict[name]
            torch_name = torch_prefix + torch_postfix
            if torch_postfix in ["expand_conv.0.weight", "se.fc1.weight", "se.fc2.weight", "project_conv.0.weight"]:
                data = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)
            elif torch_postfix == "dwconv.0.weight":
                data = np.transpose(data, (2, 3, 0, 1)).astype(np.float32)
            weights_dict[torch_name] = data
        elif "top_conv/kernel:0" == name:
            torch_name = "features.top.0.weight"
            weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)
        elif "top_bn/gamma:0" == name:
            torch_name = "features.top.1.weight"
            weights_dict[torch_name] = data
        elif "top_bn/beta:0" == name:
            torch_name = "features.top.1.bias"
            weights_dict[torch_name] = data
        elif "top_bn/moving_mean:0" == name:
            torch_name = "features.top.1.running_mean"
            weights_dict[torch_name] = data
        elif "top_bn/moving_variance:0" == name:
            torch_name = "features.top.1.running_var"
            weights_dict[torch_name] = data
        elif "predictions/kernel:0" == name:
            torch_name = "classifier.1.weight"
            weights_dict[torch_name] = np.transpose(data, (1, 0)).astype(np.float32)
        elif "predictions/bias:0" == name:
            torch_name = "classifier.1.bias"
            weights_dict[torch_name] = data
        else:
            raise KeyError("no match key '{}'".format(name))

    for k, v in weights_dict.items():
        weights_dict[k] = torch.as_tensor(v)

    torch.save(weights_dict, save_path)
    print("Conversion complete.")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/Test9_efficientNet/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data

        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 验证样本总个数
    total_num = len(data_loader.dataset)

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    return sum_num.item() / total_num


================================================
FILE: pytorch_classification/analyze_weights_featuremap/alexnet_model.py
================================================
import torch.nn as nn
import torch


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]
            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(128 * 6 * 6, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        outputs = []
        for name, module in self.features.named_children():
            x = module(x)
            if name in ["0", "3", "6"]:
                outputs.append(x)

        return outputs

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


================================================
FILE: pytorch_classification/analyze_weights_featuremap/analyze_feature_map.py
================================================
import torch
from alexnet_model import AlexNet
from resnet_model import resnet34
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from torchvision import transforms

data_transform = transforms.Compose(
    [transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# data_transform = transforms.Compose(
#     [transforms.Resize(256),
#      transforms.CenterCrop(224),
#      transforms.ToTensor(),
#      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# create model
model = AlexNet(num_classes=5)
# model = resnet34(num_classes=5)
# load model weights
model_weight_path = "./AlexNet.pth"  # "./resNet34.pth"
model.load_state_dict(torch.load(model_weight_path))
print(model)

# load image
img = Image.open("../tulip.jpg")
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)

# forward
out_put = model(img)
for feature_map in out_put:
    # [N, C, H, W] -> [C, H, W]
    im = np.squeeze(feature_map.detach().numpy())
    # [C, H, W] -> [H, W, C]
    im = np.transpose(im, [1, 2, 0])

    # show top 12 feature maps
    plt.figure()
    for i in range(12):
        ax = plt.subplot(3, 4, i+1)
        # [H, W, C]
        plt.imshow(im[:, :, i], cmap='gray')
    plt.show()


================================================
FILE: pytorch_classification/analyze_weights_featuremap/analyze_kernel_weight.py
================================================
import torch
from alexnet_model import AlexNet
from resnet_model import resnet34
import matplotlib.pyplot as plt
import numpy as np


# create model
model = AlexNet(num_classes=5)
# model = resnet34(num_classes=5)
# load model weights
model_weight_path = "./AlexNet.pth"  # "resNet34.pth"
model.load_state_dict(torch.load(model_weight_path))
print(model)

weights_keys = model.state_dict().keys()
for key in weights_keys:
    # remove num_batches_tracked para(in bn)
    if "num_batches_tracked" in key:
        continue
    # [kernel_number, kernel_channel, kernel_height, kernel_width]
    weight_t = model.state_dict()[key].numpy()

    # read a kernel information
    # k = weight_t[0, :, :, :]

    # calculate mean, std, min, max
    weight_mean = weight_t.mean()
    weight_std = weight_t.std(ddof=1)
    weight_min = weight_t.min()
    weight_max = weight_t.max()
    print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean,
                                                               weight_std,
                                                               weight_max,
                                                               weight_min))

    # plot hist image
    plt.close()
    weight_vec = np.reshape(weight_t, [-1])
    plt.hist(weight_vec, bins=50)
    plt.title(key)
    plt.show()


================================================
FILE: pytorch_classification/analyze_weights_featuremap/resnet_model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        outputs = []
        x = self.conv1(x)
        outputs.append(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        outputs.append(x)
        # x = self.layer2(x)
        # x = self.layer3(x)
        # x = self.layer4(x)
        #
        # if self.include_top:
        #     x = self.avgpool(x)
        #     x = torch.flatten(x, 1)
        #     x = self.fc(x)

        return outputs


def resnet34(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: pytorch_classification/custom_dataset/main.py
================================================
import os

import torch
from torchvision import transforms

from my_dataset import MyDataSet
from utils import read_split_data, plot_data_loader_image

# https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
root = "/home/wz/my_github/data_set/flower_data/flower_photos"  # 数据集所在根目录


def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    batch_size = 8
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_data_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=nw,
                                               collate_fn=train_data_set.collate_fn)

    # plot_data_loader_image(train_loader)

    for step, data in enumerate(train_loader):
        images, labels = data


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/custom_dataset/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/custom_dataset/utils.py
================================================
import os
import json
import pickle
import random

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


================================================
FILE: pytorch_classification/grad_cam/README.md
================================================
## Grad-CAM
- Original Impl: [https://github.com/jacobgil/pytorch-grad-cam](https://github.com/jacobgil/pytorch-grad-cam)
- Grad-CAM简介: [https://b23.tv/1kccjmb](https://b23.tv/1kccjmb)
- 使用Pytorch实现Grad-CAM并绘制热力图: [https://b23.tv/n1e60vN](https://b23.tv/n1e60vN)

## 使用流程(替换成自己的网络)
1. 将创建模型部分代码替换成自己创建模型的代码，并载入自己训练好的权重
2. 根据自己网络设置合适的`target_layers`
3. 根据自己的网络设置合适的预处理方法
4. 将要预测的图片路径赋值给`img_path`
5. 将感兴趣的类别id赋值给`target_category`


================================================
FILE: pytorch_classification/grad_cam/imagenet1k_classes.txt
================================================
tench, Tinca tinca
goldfish, Carassius auratus
great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
tiger shark, Galeocerdo cuvieri
hammerhead, hammerhead shark
electric ray, crampfish, numbfish, torpedo
stingray
cock
hen
ostrich, Struthio camelus
brambling, Fringilla montifringilla
goldfinch, Carduelis carduelis
house finch, linnet, Carpodacus mexicanus
junco, snowbird
indigo bunting, indigo finch, indigo bird, Passerina cyanea
robin, American robin, Turdus migratorius
bulbul
jay
magpie
chickadee
water ouzel, dipper
kite
bald eagle, American eagle, Haliaeetus leucocephalus
vulture
great grey owl, great gray owl, Strix nebulosa
European fire salamander, Salamandra salamandra
common newt, Triturus vulgaris
eft
spotted salamander, Ambystoma maculatum
axolotl, mud puppy, Ambystoma mexicanum
bullfrog, Rana catesbeiana
tree frog, tree-frog
tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
loggerhead, loggerhead turtle, Caretta caretta
leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
mud turtle
terrapin
box turtle, box tortoise
banded gecko
common iguana, iguana, Iguana iguana
American chameleon, anole, Anolis carolinensis
whiptail, whiptail lizard
agama
frilled lizard, Chlamydosaurus kingi
alligator lizard
Gila monster, Heloderma suspectum
green lizard, Lacerta viridis
African chameleon, Chamaeleo chamaeleon
Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
African crocodile, Nile crocodile, Crocodylus niloticus
American alligator, Alligator mississipiensis
triceratops
thunder snake, worm snake, Carphophis amoenus
ringneck snake, ring-necked snake, ring snake
hognose snake, puff adder, sand viper
green snake, grass snake
king snake, kingsnake
garter snake, grass snake
water snake
vine snake
night snake, Hypsiglena torquata
boa constrictor, Constrictor constrictor
rock python, rock snake, Python sebae
Indian cobra, Naja naja
green mamba
sea snake
horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
diamondback, diamondback rattlesnake, Crotalus adamanteus
sidewinder, horned rattlesnake, Crotalus cerastes
trilobite
harvestman, daddy longlegs, Phalangium opilio
scorpion
black and gold garden spider, Argiope aurantia
barn spider, Araneus cavaticus
garden spider, Aranea diademata
black widow, Latrodectus mactans
tarantula
wolf spider, hunting spider
tick
centipede
black grouse
ptarmigan
ruffed grouse, partridge, Bonasa umbellus
prairie chicken, prairie grouse, prairie fowl
peacock
quail
partridge
African grey, African gray, Psittacus erithacus
macaw
sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser, Mergus serrator
goose
black swan, Cygnus atratus
tusker
echidna, spiny anteater, anteater
platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
wallaby, brush kangaroo
koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
wombat
jellyfish
sea anemone, anemone
brain coral
flatworm, platyhelminth
nematode, nematode worm, roundworm
conch
snail
slug
sea slug, nudibranch
chiton, coat-of-mail shell, sea cradle, polyplacophore
chambered nautilus, pearly nautilus, nautilus
Dungeness crab, Cancer magister
rock crab, Cancer irroratus
fiddler crab
king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
American lobster, Northern lobster, Maine lobster, Homarus americanus
spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
crayfish, crawfish, crawdad, crawdaddy
hermit crab
isopod
white stork, Ciconia ciconia
black stork, Ciconia nigra
spoonbill
flamingo
little blue heron, Egretta caerulea
American egret, great white heron, Egretta albus
bittern
crane
limpkin, Aramus pictus
European gallinule, Porphyrio porphyrio
American coot, marsh hen, mud hen, water hen, Fulica americana
bustard
ruddy turnstone, Arenaria interpres
red-backed sandpiper, dunlin, Erolia alpina
redshank, Tringa totanus
dowitcher
oystercatcher, oyster catcher
pelican
king penguin, Aptenodytes patagonica
albatross, mollymawk
grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
killer whale, killer, orca, grampus, sea wolf, Orcinus orca
dugong, Dugong dugon
sea lion
Chihuahua
Japanese spaniel
Maltese dog, Maltese terrier, Maltese
Pekinese, Pekingese, Peke
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound, Afghan
basset, basset hound
beagle
bloodhound, sleuthhound
bluetick
black-and-tan coonhound
Walker hound, Walker foxhound
English foxhound
redbone
borzoi, Russian wolfhound
Irish wolfhound
Italian greyhound
whippet
Ibizan hound, Ibizan Podenco
Norwegian elkhound, elkhound
otterhound, otter hound
Saluki, gazelle hound
Scottish deerhound, deerhound
Weimaraner
Staffordshire bullterrier, Staffordshire bull terrier
American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier, Sealyham
Airedale, Airedale terrier
cairn, cairn terrier
Australian terrier
Dandie Dinmont, Dandie Dinmont terrier
Boston bull, Boston terrier
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier, Scottish terrier, Scottie
Tibetan terrier, chrysanthemum dog
silky terrier, Sydney silky
soft-coated wheaten terrier
West Highland white terrier
Lhasa, Lhasa apso
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla, Hungarian pointer
English setter
Irish setter, red setter
Gordon setter
Brittany spaniel
clumber, clumber spaniel
English springer, English springer spaniel
Welsh springer spaniel
cocker spaniel, English cocker spaniel, cocker
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog, bobtail
Shetland sheepdog, Shetland sheep dog, Shetland
collie
Border collie
Bouvier des Flandres, Bouviers des Flandres
Rottweiler
German shepherd, German shepherd dog, German police dog, alsatian
Doberman, Doberman pinscher
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard, St Bernard
Eskimo dog, husky
malamute, malemute, Alaskan malamute
Siberian husky
dalmatian, coach dog, carriage dog
affenpinscher, monkey pinscher, monkey dog
basenji
pug, pug-dog
Leonberg
Newfoundland, Newfoundland dog
Great Pyrenees
Samoyed, Samoyede
Pomeranian
chow, chow chow
keeshond
Brabancon griffon
Pembroke, Pembroke Welsh corgi
Cardigan, Cardigan Welsh corgi
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf, grey wolf, gray wolf, Canis lupus
white wolf, Arctic wolf, Canis lupus tundrarum
red wolf, maned wolf, Canis rufus, Canis niger
coyote, prairie wolf, brush wolf, Canis latrans
dingo, warrigal, warragal, Canis dingo
dhole, Cuon alpinus
African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
hyena, hyaena
red fox, Vulpes vulpes
kit fox, Vulpes macrotis
Arctic fox, white fox, Alopex lagopus
grey fox, gray fox, Urocyon cinereoargenteus
tabby, tabby cat
tiger cat
Persian cat
Siamese cat, Siamese
Egyptian cat
cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
lynx, catamount
leopard, Panthera pardus
snow leopard, ounce, Panthera uncia
jaguar, panther, Panthera onca, Felis onca
lion, king of beasts, Panthera leo
tiger, Panthera tigris
cheetah, chetah, Acinonyx jubatus
brown bear, bruin, Ursus arctos
American black bear, black bear, Ursus americanus, Euarctos americanus
ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
sloth bear, Melursus ursinus, Ursus ursinus
mongoose
meerkat, mierkat
tiger beetle
ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
ground beetle, carabid beetle
long-horned beetle, longicorn, longicorn beetle
leaf beetle, chrysomelid
dung beetle
rhinoceros beetle
weevil
fly
bee
ant, emmet, pismire
grasshopper, hopper
cricket
walking stick, walkingstick, stick insect
cockroach, roach
mantis, mantid
cicada, cicala
leafhopper
lacewing, lacewing fly
dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
damselfly
admiral
ringlet, ringlet butterfly
monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
cabbage butterfly
sulphur butterfly, sulfur butterfly
lycaenid, lycaenid butterfly
starfish, sea star
sea urchin
sea cucumber, holothurian
wood rabbit, cottontail, cottontail rabbit
hare
Angora, Angora rabbit
hamster
porcupine, hedgehog
fox squirrel, eastern fox squirrel, Sciurus niger
marmot
beaver
guinea pig, Cavia cobaya
sorrel
zebra
hog, pig, grunter, squealer, Sus scrofa
wild boar, boar, Sus scrofa
warthog
hippopotamus, hippo, river horse, Hippopotamus amphibius
ox
water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
bison
ram, tup
bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
ibex, Capra ibex
hartebeest
impala, Aepyceros melampus
gazelle
Arabian camel, dromedary, Camelus dromedarius
llama
weasel
mink
polecat, fitch, foulmart, foumart, Mustela putorius
black-footed ferret, ferret, Mustela nigripes
otter
skunk, polecat, wood pussy
badger
armadillo
three-toed sloth, ai, Bradypus tridactylus
orangutan, orang, orangutang, Pongo pygmaeus
gorilla, Gorilla gorilla
chimpanzee, chimp, Pan troglodytes
gibbon, Hylobates lar
siamang, Hylobates syndactylus, Symphalangus syndactylus
guenon, guenon monkey
patas, hussar monkey, Erythrocebus patas
baboon
macaque
langur
colobus, colobus monkey
proboscis monkey, Nasalis larvatus
marmoset
capuchin, ringtail, Cebus capucinus
howler monkey, howler
titi, titi monkey
spider monkey, Ateles geoffroyi
squirrel monkey, Saimiri sciureus
Madagascar cat, ring-tailed lemur, Lemur catta
indri, indris, Indri indri, Indri brevicaudatus
Indian elephant, Elephas maximus
African elephant, Loxodonta africana
lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
barracouta, snoek
eel
coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
rock beauty, Holocanthus tricolor
anemone fish
sturgeon
gar, garfish, garpike, billfish, Lepisosteus osseus
lionfish
puffer, pufferfish, blowfish, globefish
abacus
abaya
academic gown, academic robe, judge's robe
accordion, piano accordion, squeeze box
acoustic guitar
aircraft carrier, carrier, flattop, attack aircraft carrier
airliner
airship, dirigible
altar
ambulance
amphibian, amphibious vehicle
analog clock
apiary, bee house
apron
ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
assault rifle, assault gun
backpack, back pack, knapsack, packsack, rucksack, haversack
bakery, bakeshop, bakehouse
balance beam, beam
balloon
ballpoint, ballpoint pen, ballpen, Biro
Band Aid
banjo
bannister, banister, balustrade, balusters, handrail
barbell
barber chair
barbershop
barn
barometer
barrel, cask
barrow, garden cart, lawn cart, wheelbarrow
baseball
basketball
bassinet
bassoon
bathing cap, swimming cap
bath towel
bathtub, bathing tub, bath, tub
beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
beacon, lighthouse, beacon light, pharos
beaker
bearskin, busby, shako
beer bottle
beer glass
bell cote, bell cot
bib
bicycle-built-for-two, tandem bicycle, tandem
bikini, two-piece
binder, ring-binder
binoculars, field glasses, opera glasses
birdhouse
boathouse
bobsled, bobsleigh, bob
bolo tie, bolo, bola tie, bola
bonnet, poke bonnet
bookcase
bookshop, bookstore, bookstall
bottlecap
bow
bow tie, bow-tie, bowtie
brass, memorial tablet, plaque
brassiere, bra, bandeau
breakwater, groin, groyne, mole, bulwark, seawall, jetty
breastplate, aegis, egis
broom
bucket, pail
buckle
bulletproof vest
bullet train, bullet
butcher shop, meat market
cab, hack, taxi, taxicab
caldron, cauldron
candle, taper, wax light
cannon
canoe
can opener, tin opener
cardigan
car mirror
carousel, carrousel, merry-go-round, roundabout, whirligig
carpenter's kit, tool kit
carton
car wheel
cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
cassette
cassette player
castle
catamaran
CD player
cello, violoncello
cellular telephone, cellular phone, cellphone, cell, mobile phone
chain
chainlink fence
chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
chain saw, chainsaw
chest
chiffonier, commode
chime, bell, gong
china cabinet, china closet
Christmas stocking
church, church building
cinema, movie theater, movie theatre, movie house, picture palace
cleaver, meat cleaver, chopper
cliff dwelling
cloak
clog, geta, patten, sabot
cocktail shaker
coffee mug
coffeepot
coil, spiral, volute, whorl, helix
combination lock
computer keyboard, keypad
confectionery, confectionary, candy store
container ship, containership, container vessel
convertible
corkscrew, bottle screw
cornet, horn, trumpet, trump
cowboy boot
cowboy hat, ten-gallon hat
cradle
crane
crash helmet
crate
crib, cot
Crock Pot
croquet ball
crutch
cuirass
dam, dike, dyke
desk
desktop computer
dial telephone, dial phone
diaper, nappy, napkin
digital clock
digital watch
dining table, board
dishrag, dishcloth
dishwasher, dish washer, dishwashing machine
disk brake, disc brake
dock, dockage, docking facility
dogsled, dog sled, dog sleigh
dome
doormat, welcome mat
drilling platform, offshore rig
drum, membranophone, tympan
drumstick
dumbbell
Dutch oven
electric fan, blower
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa, boa
file, file cabinet, filing cabinet
fireboat
fire engine, fire truck
fire screen, fireguard
flagpole, flagstaff
flute, transverse flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn, horn
frying pan, frypan, skillet
fur coat
garbage truck, dustcart
gasmask, respirator, gas helmet
gas pump, gasoline pump, petrol pump, island dispenser
goblet
go-kart
golf ball
golfcart, golf cart
gondola
gong, tam-tam
gown
grand piano, grand
greenhouse, nursery, glasshouse
grille, radiator grille
grocery store, grocery, food market, market
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower, blow dryer, blow drier, hair dryer, hair drier
hand-held computer, hand-held microcomputer
handkerchief, hankie, hanky, hankey
hard disc, hard disk, fixed disk
harmonica, mouth organ, harp, mouth harp
harp
harvester, reaper
hatchet
holster
home theater, home theatre
honeycomb
hook, claw
hoopskirt, crinoline
horizontal bar, high bar
horse cart, horse-cart
hourglass
iPod
iron, smoothing iron
jack-o'-lantern
jean, blue jean, denim
jeep, landrover
jersey, T-shirt, tee shirt
jigsaw puzzle
jinrikisha, ricksha, rickshaw
joystick
kimono
knee pad
knot
lab coat, laboratory coat
ladle
lampshade, lamp shade
laptop, laptop computer
lawn mower, mower
lens cap, lens cover
letter opener, paper knife, paperknife
library
lifeboat
lighter, light, igniter, ignitor
limousine, limo
liner, ocean liner
lipstick, lip rouge
Loafer
lotion
loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
loupe, jeweler's loupe
lumbermill, sawmill
magnetic compass
mailbag, postbag
mailbox, letter box
maillot
maillot, tank suit
manhole cover
maraca
marimba, xylophone
mask
matchstick
maypole
maze, labyrinth
measuring cup
medicine chest, medicine cabinet
megalith, megalithic structure
microphone, mike
microwave, microwave oven
military uniform
milk can
minibus
miniskirt, mini
minivan
missile
mitten
mixing bowl
mobile home, manufactured home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter, scooter
mountain bike, all-terrain bike, off-roader
mountain tent
mouse, computer mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook, notebook computer
obelisk
oboe, hautboy, hautbois
ocarina, sweet potato
odometer, hodometer, mileometer, milometer
oil filter
organ, pipe organ
oscilloscope, scope, cathode-ray oscilloscope, CRO
overskirt
oxcart
oxygen mask
packet
paddle, boat paddle
paddlewheel, paddle wheel
padlock
paintbrush
pajama, pyjama, pj's, jammies
palace
panpipe, pandean pipe, syrinx
paper towel
parachute, chute
parallel bars, bars
park bench
parking meter
passenger car, coach, carriage
patio, terrace
pay-phone, pay-station
pedestal, plinth, footstall
pencil box, pencil case
pencil sharpener
perfume, essence
Petri dish
photocopier
pick, plectrum, plectron
pickelhaube
picket fence, paling
pickup, pickup truck
pier
piggy bank, penny bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate, pirate ship
pitcher, ewer
plane, carpenter's plane, woodworking plane
planetarium
plastic bag
plate rack
plow, plough
plunger, plumber's helper
Polaroid camera, Polaroid Land camera
pole
police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
poncho
pool table, billiard table, snooker table
pop bottle, soda bottle
pot, flowerpot
potter's wheel
power drill
prayer rug, prayer mat
printer
prison, prison house
projectile, missile
projector
puck, hockey puck
punching bag, punch bag, punching ball, punchball
purse
quill, quill pen
quilt, comforter, comfort, puff
racer, race car, racing car
racket, racquet
radiator
radio, wireless
radio telescope, radio reflector
rain barrel
recreational vehicle, RV, R.V.
reel
reflex camera
refrigerator, icebox
remote control, remote
restaurant, eating house, eating place, eatery
revolver, six-gun, six-shooter
rifle
rocking chair, rocker
rotisserie
rubber eraser, rubber, pencil eraser
rugby ball
rule, ruler
running shoe
safe
safety pin
saltshaker, salt shaker
sandal
sarong
sax, saxophone
scabbard
scale, weighing machine
school bus
schooner
scoreboard
screen, CRT screen
screw
screwdriver
seat belt, seatbelt
sewing machine
shield, buckler
shoe shop, shoe-shop, shoe store
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule, slipstick
sliding door
slot, one-armed bandit
snorkel
snowmobile
snowplow, snowplough
soap dispenser
soccer ball
sock
solar dish, solar collector, solar furnace
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web, spider's web
spindle
sports car, sport car
spotlight, spot
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch, stop watch
stove
strainer
streetcar, tram, tramcar, trolley, trolley car
stretcher
studio couch, day bed
stupa, tope
submarine, pigboat, sub, U-boat
suit, suit of clothes
sundial
sunglass
sunglasses, dark glasses, shades
sunscreen, sunblock, sun blocker
suspension bridge
swab, swob, mop
sweatshirt
swimming trunks, bathing trunks
swing
switch, electric switch, electrical switch
syringe
table lamp
tank, army tank, armored combat vehicle, armoured combat vehicle
tape player
teapot
teddy, teddy bear
television, television system
tennis ball
thatch, thatched roof
theater curtain, theatre curtain
thimble
thresher, thrasher, threshing machine
throne
tile roof
toaster
tobacco shop, tobacconist shop, tobacconist
toilet seat
torch
totem pole
tow truck, tow car, wrecker
toyshop
tractor
trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
tray
trench coat
tricycle, trike, velocipede
trimaran
tripod
triumphal arch
trolleybus, trolley coach, trackless trolley
trombone
tub, vat
turnstile
typewriter keyboard
umbrella
unicycle, monocycle
upright, upright piano
vacuum, vacuum cleaner
vase
vault
velvet
vending machine
vestment
viaduct
violin, fiddle
volleyball
waffle iron
wall clock
wallet, billfold, notecase, pocketbook
wardrobe, closet, press
warplane, military plane
washbasin, handbasin, washbowl, lavabo, wash-hand basin
washer, automatic washer, washing machine
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool, woolen, woollen
worm fence, snake fence, snake-rail fence, Virginia fence
wreck
yawl
yurt
web site, website, internet site, site
comic book
crossword puzzle, crossword
street sign
traffic light, traffic signal, stoplight
book jacket, dust cover, dust jacket, dust wrapper
menu
plate
guacamole
consomme
hot pot, hotpot
trifle
ice cream, icecream
ice lolly, lolly, lollipop, popsicle
French loaf
bagel, beigel
pretzel
cheeseburger
hotdog, hot dog, red hot
mashed potato
head cabbage
broccoli
cauliflower
zucchini, courgette
spaghetti squash
acorn squash
butternut squash
cucumber, cuke
artichoke, globe artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple, ananas
banana
jackfruit, jak, jack
custard apple
pomegranate
hay
carbonara
chocolate sauce, chocolate syrup
dough
meat loaf, meatloaf
pizza, pizza pie
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff, drop, drop-off
coral reef
geyser
lakeside, lakeshore
promontory, headland, head, foreland
sandbar, sand bar
seashore, coast, seacoast, sea-coast
valley, vale
volcano
ballplayer, baseball player
groom, bridegroom
scuba diver
rapeseed
daisy
yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
corn
acorn
hip, rose hip, rosehip
buckeye, horse chestnut, conker
coral fungus
agaric
gyromitra
stinkhorn, carrion fungus
earthstar
hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
bolete
ear, spike, capitulum
toilet tissue, toilet paper, bathroom tissue

================================================
FILE: pytorch_classification/grad_cam/imagenet21k_classes.txt
================================================
organism, being
benthos
heterotroph
cell
person, individual, someone, somebody, mortal, soul
animal, animate_being, beast, brute, creature, fauna
plant, flora, plant_life
food, nutrient
artifact, artefact
hop
check-in
dressage
curvet, vaulting
piaffe
funambulism, tightrope_walking
rock_climbing
contact_sport
outdoor_sport, field_sport
gymnastics, gymnastic_exercise
acrobatics, tumbling
track_and_field
track, running
jumping
broad_jump, long_jump
high_jump
Fosbury_flop
skiing
cross-country_skiing
ski_jumping
water_sport, aquatics
swimming, swim
bathe
dip, plunge
dive, diving
floating, natation
dead-man's_float, prone_float
belly_flop, belly_flopper, belly_whop, belly_whopper
cliff_diving
flip
gainer, full_gainer
half_gainer
jackknife
swan_dive, swallow_dive
skin_diving, skin-dive
scuba_diving
snorkeling, snorkel_diving
surfing, surfboarding, surfriding
water-skiing
rowing, row
sculling
boxing, pugilism, fisticuffs
professional_boxing
in-fighting
fight
rope-a-dope
spar, sparring
archery
sledding
tobogganing
luging
bobsledding
wrestling, rassling, grappling
Greco-Roman_wrestling
professional_wrestling
sumo
skating
ice_skating
figure_skating
rollerblading
roller_skating
skateboarding
speed_skating
racing
auto_racing, car_racing
boat_racing
hydroplane_racing
camel_racing
greyhound_racing
horse_racing
riding, horseback_riding, equitation
equestrian_sport
pony-trekking
showjumping, stadium_jumping
cross-country_riding, cross-country_jumping
cycling
bicycling
motorcycling
dune_cycling
blood_sport
bullfighting, tauromachy
cockfighting
hunt, hunting
battue
beagling
coursing
deer_hunting, deer_hunt
ducking, duck_hunting
fox_hunting, foxhunt
pigsticking
fishing, sportfishing
angling
fly-fishing
troll, trolling
casting, cast
bait_casting
fly_casting
overcast
surf_casting, surf_fishing
day_game
athletic_game
ice_hockey, hockey, hockey_game
tetherball
water_polo
outdoor_game
golf, golf_game
professional_golf
round_of_golf, round
medal_play, stroke_play
match_play
miniature_golf
croquet
quoits, horseshoes
shuffleboard, shovelboard
field_game
field_hockey, hockey
shinny, shinney
football, football_game
American_football, American_football_game
professional_football
touch_football
hurling
rugby, rugby_football, rugger
ball_game, ballgame
baseball, baseball_game
ball
professional_baseball
hardball
perfect_game
no-hit_game, no-hitter
one-hitter, 1-hitter
two-hitter, 2-hitter
three-hitter, 3-hitter
four-hitter, 4-hitter
five-hitter, 5-hitter
softball, softball_game
rounders
stickball, stickball_game
cricket
lacrosse
polo
pushball
soccer, association_football
court_game
handball
racquetball
fives
squash, squash_racquets, squash_rackets
volleyball, volleyball_game
jai_alai, pelota
badminton
battledore, battledore_and_shuttlecock
basketball, basketball_game, hoops
professional_basketball
deck_tennis
netball
tennis, lawn_tennis
professional_tennis
singles
singles
doubles
doubles
royal_tennis, real_tennis, court_tennis
pallone
sport, athletics
clasp, clench, clutch, clutches, grasp, grip, hold
judo
team_sport
Last_Supper, Lord's_Supper
Seder, Passover_supper
camping, encampment, bivouacking, tenting
pest
critter
creepy-crawly
darter
peeper
homeotherm, homoiotherm, homotherm
poikilotherm, ectotherm
range_animal
scavenger
bottom-feeder, bottom-dweller
bottom-feeder
work_animal
beast_of_burden, jument
draft_animal
pack_animal, sumpter
domestic_animal, domesticated_animal
feeder
feeder
stocker
hatchling
head
migrator
molter, moulter
pet
stayer
stunt
marine_animal, marine_creature, sea_animal, sea_creature
by-catch, bycatch
female
hen
male
adult
young, offspring
orphan
young_mammal
baby
pup, whelp
wolf_pup, wolf_cub
puppy
cub, young_carnivore
lion_cub
bear_cub
tiger_cub
kit
suckling
sire
dam
thoroughbred, purebred, pureblood
giant
mutant
carnivore
herbivore
insectivore
acrodont
pleurodont
microorganism, micro-organism
monohybrid
arbovirus, arborvirus
adenovirus
arenavirus
Marburg_virus
Arenaviridae
vesiculovirus
Reoviridae
variola_major, variola_major_virus
viroid, virusoid
coliphage
paramyxovirus
poliovirus
herpes, herpes_virus
herpes_simplex_1, HS1, HSV-1, HSV-I
herpes_zoster, herpes_zoster_virus
herpes_varicella_zoster, herpes_varicella_zoster_virus
cytomegalovirus, CMV
varicella_zoster_virus
polyoma, polyoma_virus
lyssavirus
reovirus
rotavirus
moneran, moneron
archaebacteria, archaebacterium, archaeobacteria, archeobacteria
bacteroid
Bacillus_anthracis, anthrax_bacillus
Yersinia_pestis
Brucella
spirillum, spirilla
botulinus, botulinum, Clostridium_botulinum
clostridium_perfringens
cyanobacteria, blue-green_algae
trichodesmium
nitric_bacteria, nitrobacteria
spirillum
Francisella, genus_Francisella
gonococcus, Neisseria_gonorrhoeae
Corynebacterium_diphtheriae, C._diphtheriae, Klebs-Loeffler_bacillus
enteric_bacteria, enterobacteria, enterics, entric
klebsiella
Salmonella_typhimurium
typhoid_bacillus, Salmonella_typhosa, Salmonella_typhi
nitrate_bacterium, nitric_bacterium
nitrite_bacterium, nitrous_bacterium
actinomycete
streptomyces
Streptomyces_erythreus
Streptomyces_griseus
tubercle_bacillus, Mycobacterium_tuberculosis
pus-forming_bacteria
streptobacillus
myxobacteria, myxobacterium, myxobacter, gliding_bacteria, slime_bacteria
staphylococcus, staphylococci, staph
diplococcus
pneumococcus, Diplococcus_pneumoniae
streptococcus, streptococci, strep
spirochete, spirochaete
planktonic_algae
zooplankton
parasite
endoparasite, entoparasite, entozoan, entozoon, endozoan
ectoparasite, ectozoan, ectozoon, epizoan, epizoon
pathogen
commensal
myrmecophile
protoctist
protozoan, protozoon
sarcodinian, sarcodine
heliozoan
endameba
ameba, amoeba
globigerina
testacean
arcella
difflugia
ciliate, ciliated_protozoan, ciliophoran
paramecium, paramecia
stentor
alga, algae
arame
seagrass
golden_algae
yellow-green_algae
brown_algae
kelp
fucoid, fucoid_algae
fucoid
fucus
bladderwrack, Ascophyllum_nodosum
green_algae, chlorophyte
pond_scum
chlorella
stonewort
desmid
sea_moss
eukaryote, eucaryote
prokaryote, procaryote
zooid
Leishmania, genus_Leishmania
zoomastigote, zooflagellate
polymastigote
costia, Costia_necatrix
giardia
cryptomonad, cryptophyte
sporozoan
sporozoite
trophozoite
merozoite
coccidium, eimeria
gregarine
plasmodium, Plasmodium_vivax, malaria_parasite
leucocytozoan, leucocytozoon
microsporidian
Ostariophysi, order_Ostariophysi
cypriniform_fish
loach
cyprinid, cyprinid_fish
carp
domestic_carp, Cyprinus_carpio
leather_carp
mirror_carp
European_bream, Abramis_brama
tench, Tinca_tinca
dace, Leuciscus_leuciscus
chub, Leuciscus_cephalus
shiner
common_shiner, silversides, Notropis_cornutus
roach, Rutilus_rutilus
rudd, Scardinius_erythrophthalmus
minnow, Phoxinus_phoxinus
gudgeon, Gobio_gobio
goldfish, Carassius_auratus
crucian_carp, Carassius_carassius, Carassius_vulgaris
electric_eel, Electrophorus_electric
catostomid
buffalo_fish, buffalofish
black_buffalo, Ictiobus_niger
hog_sucker, hog_molly, Hypentelium_nigricans
redhorse, redhorse_sucker
cyprinodont
killifish
mummichog, Fundulus_heteroclitus
striped_killifish, mayfish, may_fish, Fundulus_majalis
rivulus
flagfish, American_flagfish, Jordanella_floridae
swordtail, helleri, topminnow, Xyphophorus_helleri
guppy, rainbow_fish, Lebistes_reticulatus
topminnow, poeciliid_fish, poeciliid, live-bearer
mosquitofish, Gambusia_affinis
platy, Platypoecilus_maculatus
mollie, molly
squirrelfish
reef_squirrelfish, Holocentrus_coruscus
deepwater_squirrelfish, Holocentrus_bullisi
Holocentrus_ascensionis
soldierfish, soldier-fish
anomalops, flashlight_fish
flashlight_fish, Photoblepharon_palpebratus
John_Dory, Zeus_faber
boarfish, Capros_aper
boarfish
cornetfish
stickleback, prickleback
three-spined_stickleback, Gasterosteus_aculeatus
ten-spined_stickleback, Gasterosteus_pungitius
pipefish, needlefish
dwarf_pipefish, Syngnathus_hildebrandi
deepwater_pipefish, Cosmocampus_profundus
seahorse, sea_horse
snipefish, bellows_fish
shrimpfish, shrimp-fish
trumpetfish, Aulostomus_maculatus
pellicle
embryo, conceptus, fertilized_egg
fetus, foetus
abortus
spawn
blastula, blastosphere
blastocyst, blastodermic_vessicle
gastrula
morula
yolk, vitellus
chordate
cephalochordate
lancelet, amphioxus
tunicate, urochordate, urochord
ascidian
sea_squirt
salp, salpa
doliolum
larvacean
appendicularia
ascidian_tadpole
vertebrate, craniate
Amniota
amniote
aquatic_vertebrate
jawless_vertebrate, jawless_fish, agnathan
ostracoderm
heterostracan
anaspid
conodont
cyclostome
lamprey, lamprey_eel, lamper_eel
sea_lamprey, Petromyzon_marinus
hagfish, hag, slime_eels
Myxine_glutinosa
eptatretus
gnathostome
placoderm
cartilaginous_fish, chondrichthian
holocephalan, holocephalian
chimaera
rabbitfish, Chimaera_monstrosa
elasmobranch, selachian
shark
cow_shark, six-gilled_shark, Hexanchus_griseus
mackerel_shark
porbeagle, Lamna_nasus
mako, mako_shark
shortfin_mako, Isurus_oxyrhincus
longfin_mako, Isurus_paucus
bonito_shark, blue_pointed, Isurus_glaucus
great_white_shark, white_shark, man-eater, man-eating_shark, Carcharodon_carcharias
basking_shark, Cetorhinus_maximus
thresher, thrasher, thresher_shark, fox_shark, Alopius_vulpinus
carpet_shark, Orectolobus_barbatus
nurse_shark, Ginglymostoma_cirratum
sand_tiger, sand_shark, Carcharias_taurus, Odontaspis_taurus
whale_shark, Rhincodon_typus
requiem_shark
bull_shark, cub_shark, Carcharhinus_leucas
sandbar_shark, Carcharhinus_plumbeus
blacktip_shark, sandbar_shark, Carcharhinus_limbatus
whitetip_shark, oceanic_whitetip_shark, white-tipped_shark, Carcharinus_longimanus
dusky_shark, Carcharhinus_obscurus
lemon_shark, Negaprion_brevirostris
blue_shark, great_blue_shark, Prionace_glauca
tiger_shark, Galeocerdo_cuvieri
soupfin_shark, soupfin, soup-fin, Galeorhinus_zyopterus
dogfish
smooth_dogfish
smoothhound, smoothhound_shark, Mustelus_mustelus
American_smooth_dogfish, Mustelus_canis
Florida_smoothhound, Mustelus_norrisi
whitetip_shark, reef_whitetip_shark, Triaenodon_obseus
spiny_dogfish
Atlantic_spiny_dogfish, Squalus_acanthias
Pacific_spiny_dogfish, Squalus_suckleyi
hammerhead, hammerhead_shark
smooth_hammerhead, Sphyrna_zygaena
smalleye_hammerhead, Sphyrna_tudes
shovelhead, bonnethead, bonnet_shark, Sphyrna_tiburo
angel_shark, angelfish, Squatina_squatina, monkfish
ray
electric_ray, crampfish, numbfish, torpedo
sawfish
smalltooth_sawfish, Pristis_pectinatus
guitarfish
stingray
roughtail_stingray, Dasyatis_centroura
butterfly_ray
eagle_ray
spotted_eagle_ray, spotted_ray, Aetobatus_narinari
cownose_ray, cow-nosed_ray, Rhinoptera_bonasus
manta, manta_ray, devilfish
Atlantic_manta, Manta_birostris
devil_ray, Mobula_hypostoma
skate
grey_skate, gray_skate, Raja_batis
little_skate, Raja_erinacea
thorny_skate, Raja_radiata
barndoor_skate, Raja_laevis
bird
dickeybird, dickey-bird, dickybird, dicky-bird
fledgling, fledgeling
nestling, baby_bird
cock
gamecock, fighting_cock
hen
nester
night_bird
night_raven
bird_of_passage
archaeopteryx, archeopteryx, Archaeopteryx_lithographica
archaeornis
ratite, ratite_bird, flightless_bird
carinate, carinate_bird, flying_bird
ostrich, Struthio_camelus
cassowary
emu, Dromaius_novaehollandiae, Emu_novaehollandiae
kiwi, apteryx
rhea, Rhea_americana
rhea, nandu, Pterocnemia_pennata
elephant_bird, aepyornis
moa
passerine, passeriform_bird
nonpasserine_bird
oscine, oscine_bird
songbird, songster
honey_eater, honeysucker
accentor
hedge_sparrow, sparrow, dunnock, Prunella_modularis
lark
skylark, Alauda_arvensis
wagtail
pipit, titlark, lark
meadow_pipit, Anthus_pratensis
finch
chaffinch, Fringilla_coelebs
brambling, Fringilla_montifringilla
goldfinch, Carduelis_carduelis
linnet, lintwhite, Carduelis_cannabina
siskin, Carduelis_spinus
red_siskin, Carduelis_cucullata
redpoll, Carduelis_flammea
redpoll, Carduelis_hornemanni
New_World_goldfinch, goldfinch, yellowbird, Spinus_tristis
pine_siskin, pine_finch, Spinus_pinus
house_finch, linnet, Carpodacus_mexicanus
purple_finch, Carpodacus_purpureus
canary, canary_bird
common_canary, Serinus_canaria
serin
crossbill, Loxia_curvirostra
bullfinch, Pyrrhula_pyrrhula
junco, snowbird
dark-eyed_junco, slate-colored_junco, Junco_hyemalis
New_World_sparrow
vesper_sparrow, grass_finch, Pooecetes_gramineus
white-throated_sparrow, whitethroat, Zonotrichia_albicollis
white-crowned_sparrow, Zonotrichia_leucophrys
chipping_sparrow, Spizella_passerina
field_sparrow, Spizella_pusilla
tree_sparrow, Spizella_arborea
song_sparrow, Melospiza_melodia
swamp_sparrow, Melospiza_georgiana
bunting
indigo_bunting, indigo_finch, indigo_bird, Passerina_cyanea
ortolan, ortolan_bunting, Emberiza_hortulana
reed_bunting, Emberiza_schoeniclus
yellowhammer, yellow_bunting, Emberiza_citrinella
yellow-breasted_bunting, Emberiza_aureola
snow_bunting, snowbird, snowflake, Plectrophenax_nivalis
honeycreeper
banana_quit
sparrow, true_sparrow
English_sparrow, house_sparrow, Passer_domesticus
tree_sparrow, Passer_montanus
grosbeak, grossbeak
evening_grosbeak, Hesperiphona_vespertina
hawfinch, Coccothraustes_coccothraustes
pine_grosbeak, Pinicola_enucleator
cardinal, cardinal_grosbeak, Richmondena_Cardinalis, Cardinalis_cardinalis, redbird
pyrrhuloxia, Pyrrhuloxia_sinuata
towhee
chewink, cheewink, Pipilo_erythrophthalmus
green-tailed_towhee, Chlorura_chlorura
weaver, weaverbird, weaver_finch
baya, Ploceus_philippinus
whydah, whidah, widow_bird
Java_sparrow, Java_finch, ricebird, Padda_oryzivora
avadavat, amadavat
grassfinch, grass_finch
zebra_finch, Poephila_castanotis
honeycreeper, Hawaiian_honeycreeper
lyrebird
scrubbird, scrub-bird, scrub_bird
broadbill
tyrannid
New_World_flycatcher, flycatcher, tyrant_flycatcher, tyrant_bird
kingbird, Tyrannus_tyrannus
Arkansas_kingbird, western_kingbird
Cassin's_kingbird, Tyrannus_vociferans
eastern_kingbird
grey_kingbird, gray_kingbird, petchary, Tyrannus_domenicensis_domenicensis
pewee, peewee, peewit, pewit, wood_pewee, Contopus_virens
western_wood_pewee, Contopus_sordidulus
phoebe, phoebe_bird, Sayornis_phoebe
vermillion_flycatcher, firebird, Pyrocephalus_rubinus_mexicanus
cotinga, chatterer
cock_of_the_rock, Rupicola_rupicola
cock_of_the_rock, Rupicola_peruviana
manakin
bellbird
umbrella_bird, Cephalopterus_ornatus
ovenbird
antbird, ant_bird
ant_thrush
ant_shrike
spotted_antbird, Hylophylax_naevioides
woodhewer, woodcreeper, wood-creeper, tree_creeper
pitta
scissortail, scissortailed_flycatcher, Muscivora-forficata
Old_World_flycatcher, true_flycatcher, flycatcher
spotted_flycatcher, Muscicapa_striata, Muscicapa_grisola
thickhead, whistler
thrush
missel_thrush, mistle_thrush, mistletoe_thrush, Turdus_viscivorus
song_thrush, mavis, throstle, Turdus_philomelos
fieldfare, snowbird, Turdus_pilaris
redwing, Turdus_iliacus
blackbird, merl, merle, ouzel, ousel, European_blackbird, Turdus_merula
ring_ouzel, ring_blackbird, ring_thrush, Turdus_torquatus
robin, American_robin, Turdus_migratorius
clay-colored_robin, Turdus_greyi
hermit_thrush, Hylocichla_guttata
veery, Wilson's_thrush, Hylocichla_fuscescens
wood_thrush, Hylocichla_mustelina
nightingale, Luscinia_megarhynchos
thrush_nightingale, Luscinia_luscinia
bulbul
Old_World_chat, chat
stonechat, Saxicola_torquata
whinchat, Saxicola_rubetra
solitaire
redstart, redtail
wheatear
bluebird
robin, redbreast, robin_redbreast, Old_World_robin, Erithacus_rubecola
bluethroat, Erithacus_svecicus
warbler
gnatcatcher
kinglet
goldcrest, golden-crested_kinglet, Regulus_regulus
gold-crowned_kinglet, Regulus_satrata
ruby-crowned_kinglet, ruby-crowned_wren, Regulus_calendula
Old_World_warbler, true_warbler
blackcap, Silvia_atricapilla
greater_whitethroat, whitethroat, Sylvia_communis
lesser_whitethroat, whitethroat, Sylvia_curruca
wood_warbler, Phylloscopus_sibilatrix
sedge_warbler, sedge_bird, sedge_wren, reedbird, Acrocephalus_schoenobaenus
wren_warbler
tailorbird, Orthotomus_sutorius
babbler, cackler
New_World_warbler, wood_warbler
parula_warbler, northern_parula, Parula_americana
Wilson's_warbler, Wilson's_blackcap, Wilsonia_pusilla
flycatching_warbler
American_redstart, redstart, Setophaga_ruticilla
Cape_May_warbler, Dendroica_tigrina
yellow_warbler, golden_warbler, yellowbird, Dendroica_petechia
Blackburn, Blackburnian_warbler, Dendroica_fusca
Audubon's_warbler, Audubon_warbler, Dendroica_auduboni
myrtle_warbler, myrtle_bird, Dendroica_coronata
blackpoll, Dendroica_striate
New_World_chat, chat
yellow-breasted_chat, Icteria_virens
ovenbird, Seiurus_aurocapillus
water_thrush
yellowthroat
common_yellowthroat, Maryland_yellowthroat, Geothlypis_trichas
riflebird, Ptloris_paradisea
New_World_oriole, American_oriole, oriole
northern_oriole, Icterus_galbula
Baltimore_oriole, Baltimore_bird, hangbird, firebird, Icterus_galbula_galbula
Bullock's_oriole, Icterus_galbula_bullockii
orchard_oriole, Icterus_spurius
meadowlark, lark
eastern_meadowlark, Sturnella_magna
western_meadowlark, Sturnella_neglecta
cacique, cazique
bobolink, ricebird, reedbird, Dolichonyx_oryzivorus
New_World_blackbird, blackbird
grackle, crow_blackbird
purple_grackle, Quiscalus_quiscula
rusty_blackbird, rusty_grackle, Euphagus_carilonus
cowbird
red-winged_blackbird, redwing, Agelaius_phoeniceus
Old_World_oriole, oriole
golden_oriole, Oriolus_oriolus
fig-bird
starling
common_starling, Sturnus_vulgaris
rose-colored_starling, rose-colored_pastor, Pastor_sturnus, Pastor_roseus
myna, mynah, mina, minah, myna_bird, mynah_bird
crested_myna, Acridotheres_tristis
hill_myna, Indian_grackle, grackle, Gracula_religiosa
corvine_bird
crow
American_crow, Corvus_brachyrhyncos
raven, Corvus_corax
rook, Corvus_frugilegus
jackdaw, daw, Corvus_monedula
chough
jay
Old_World_jay
common_European_jay, Garullus_garullus
New_World_jay
blue_jay, jaybird, Cyanocitta_cristata
Canada_jay, grey_jay, gray_jay, camp_robber, whisker_jack, Perisoreus_canadensis
Rocky_Mountain_jay, Perisoreus_canadensis_capitalis
nutcracker
common_nutcracker, Nucifraga_caryocatactes
Clark's_nutcracker, Nucifraga_columbiana
magpie
European_magpie, Pica_pica
American_magpie, Pica_pica_hudsonia
Australian_magpie
butcherbird
currawong, bell_magpie
piping_crow, piping_crow-shrike, Gymnorhina_tibicen
wren, jenny_wren
winter_wren, Troglodytes_troglodytes
house_wren, Troglodytes_aedon
marsh_wren
long-billed_marsh_wren, Cistothorus_palustris
sedge_wren, short-billed_marsh_wren, Cistothorus_platensis
rock_wren, Salpinctes_obsoletus
Carolina_wren, Thryothorus_ludovicianus
cactus_wren
mockingbird, mocker, Mimus_polyglotktos
blue_mockingbird, Melanotis_caerulescens
catbird, grey_catbird, gray_catbird, Dumetella_carolinensis
thrasher, mocking_thrush
brown_thrasher, brown_thrush, Toxostoma_rufums
New_Zealand_wren
rock_wren, Xenicus_gilviventris
rifleman_bird, Acanthisitta_chloris
creeper, tree_creeper
brown_creeper, American_creeper, Certhia_americana
European_creeper, Certhia_familiaris
wall_creeper, tichodrome, Tichodroma_muriaria
European_nuthatch, Sitta_europaea
red-breasted_nuthatch, Sitta_canadensis
white-breasted_nuthatch, Sitta_carolinensis
titmouse, tit
chickadee
black-capped_chickadee, blackcap, Parus_atricapillus
tufted_titmouse, Parus_bicolor
Carolina_chickadee, Parus_carolinensis
blue_tit, tomtit, Parus_caeruleus
bushtit, bush_tit
wren-tit, Chamaea_fasciata
verdin, Auriparus_flaviceps
fairy_bluebird, bluebird
swallow
barn_swallow, chimney_swallow, Hirundo_rustica
cliff_swallow, Hirundo_pyrrhonota
tree_swallow, tree_martin, Hirundo_nigricans
white-bellied_swallow, tree_swallow, Iridoprocne_bicolor
martin
house_martin, Delichon_urbica
bank_martin, bank_swallow, sand_martin, Riparia_riparia
purple_martin, Progne_subis
wood_swallow, swallow_shrike
tanager
scarlet_tanager, Piranga_olivacea, redbird, firebird
western_tanager, Piranga_ludoviciana
summer_tanager, summer_redbird, Piranga_rubra
hepatic_tanager, Piranga_flava_hepatica
shrike
butcherbird
European_shrike, Lanius_excubitor
northern_shrike, Lanius_borealis
white-rumped_shrike, Lanius_ludovicianus_excubitorides
loggerhead_shrike, Lanius_lucovicianus
migrant_shrike, Lanius_ludovicianus_migrans
bush_shrike
black-fronted_bush_shrike, Chlorophoneus_nigrifrons
bowerbird, catbird
satin_bowerbird, satin_bird, Ptilonorhynchus_violaceus
great_bowerbird, Chlamydera_nuchalis
water_ouzel, dipper
European_water_ouzel, Cinclus_aquaticus
American_water_ouzel, Cinclus_mexicanus
vireo
red-eyed_vireo, Vireo_olivaceous
solitary_vireo, Vireo_solitarius
blue-headed_vireo, Vireo_solitarius_solitarius
waxwing
cedar_waxwing, cedarbird, Bombycilla_cedrorun
Bohemian_waxwing, Bombycilla_garrulus
bird_of_prey, raptor, raptorial_bird
Accipitriformes, order_Accipitriformes
hawk
eyas
tiercel, tercel, tercelet
goshawk, Accipiter_gentilis
sparrow_hawk, Accipiter_nisus
Cooper's_hawk, blue_darter, Accipiter_cooperii
chicken_hawk, hen_hawk
buteonine
redtail, red-tailed_hawk, Buteo_jamaicensis
rough-legged_hawk, roughleg, Buteo_lagopus
red-shouldered_hawk, Buteo_lineatus
buzzard, Buteo_buteo
honey_buzzard, Pernis_apivorus
kite
black_kite, Milvus_migrans
swallow-tailed_kite, swallow-tailed_hawk, Elanoides_forficatus
white-tailed_kite, Elanus_leucurus
harrier
marsh_harrier, Circus_Aeruginosus
Montagu's_harrier, Circus_pygargus
marsh_hawk, northern_harrier, hen_harrier, Circus_cyaneus
harrier_eagle, short-toed_eagle
falcon
peregrine, peregrine_falcon, Falco_peregrinus
falcon-gentle, falcon-gentil
gyrfalcon, gerfalcon, Falco_rusticolus
kestrel, Falco_tinnunculus
sparrow_hawk, American_kestrel, kestrel, Falco_sparverius
pigeon_hawk, merlin, Falco_columbarius
hobby, Falco_subbuteo
caracara
Audubon's_caracara, Polyborus_cheriway_audubonii
carancha, Polyborus_plancus
eagle, bird_of_Jove
young_bird
eaglet
harpy, harpy_eagle, Harpia_harpyja
golden_eagle, Aquila_chrysaetos
tawny_eagle, Aquila_rapax
bald_eagle, American_eagle, Haliaeetus_leucocephalus
sea_eagle
Kamchatkan_sea_eagle, Stellar's_sea_eagle, Haliaeetus_pelagicus
ern, erne, grey_sea_eagle, gray_sea_eagle, European_sea_eagle, white-tailed_sea_eagle, Haliatus_albicilla
fishing_eagle, Haliaeetus_leucorhyphus
osprey, fish_hawk, fish_eagle, sea_eagle, Pandion_haliaetus
vulture
Aegypiidae, family_Aegypiidae
Old_World_vulture
griffon_vulture, griffon, Gyps_fulvus
bearded_vulture, lammergeier, lammergeyer, Gypaetus_barbatus
Egyptian_vulture, Pharaoh's_chicken, Neophron_percnopterus
black_vulture, Aegypius_monachus
secretary_bird, Sagittarius_serpentarius
New_World_vulture, cathartid
buzzard, turkey_buzzard, turkey_vulture, Cathartes_aura
condor
Andean_condor, Vultur_gryphus
California_condor, Gymnogyps_californianus
black_vulture, carrion_crow, Coragyps_atratus
king_vulture, Sarcorhamphus_papa
owl, bird_of_Minerva, bird_of_night, hooter
owlet
little_owl, Athene_noctua
horned_owl
great_horned_owl, Bubo_virginianus
great_grey_owl, great_gray_owl, Strix_nebulosa
tawny_owl, Strix_aluco
barred_owl, Strix_varia
screech_owl, Otus_asio
screech_owl
scops_owl
spotted_owl, Strix_occidentalis
Old_World_scops_owl, Otus_scops
Oriental_scops_owl, Otus_sunia
hoot_owl
hawk_owl, Surnia_ulula
long-eared_owl, Asio_otus
laughing_owl, laughing_jackass, Sceloglaux_albifacies
barn_owl, Tyto_alba
amphibian
Ichyostega
urodele, caudate
salamander
European_fire_salamander, Salamandra_salamandra
spotted_salamander, fire_salamander, Salamandra_maculosa
alpine_salamander, Salamandra_atra
newt, triton
common_newt, Triturus_vulgaris
red_eft, Notophthalmus_viridescens
Pacific_newt
rough-skinned_newt, Taricha_granulosa
California_newt, Taricha_torosa
eft
ambystomid, ambystomid_salamander
mole_salamander, Ambystoma_talpoideum
spotted_salamander, Ambystoma_maculatum
tiger_salamander, Ambystoma_tigrinum
axolotl, mud_puppy, Ambystoma_mexicanum
waterdog
hellbender, mud_puppy, Cryptobranchus_alleganiensis
giant_salamander, Megalobatrachus_maximus
olm, Proteus_anguinus
mud_puppy, Necturus_maculosus
dicamptodon, dicamptodontid
Pacific_giant_salamander, Dicamptodon_ensatus
olympic_salamander, Rhyacotriton_olympicus
lungless_salamander, plethodont
eastern_red-backed_salamander, Plethodon_cinereus
western_red-backed_salamander, Plethodon_vehiculum
dusky_salamander
climbing_salamander
arboreal_salamander, Aneides_lugubris
slender_salamander, worm_salamander
web-toed_salamander
Shasta_salamander, Hydromantes_shastae
limestone_salamander, Hydromantes_brunus
amphiuma, congo_snake, congo_eel, blind_eel
siren
frog, toad, toad_frog, anuran, batrachian, salientian
true_frog, ranid
wood-frog, wood_frog, Rana_sylvatica
leopard_frog, spring_frog, Rana_pipiens
bullfrog, Rana_catesbeiana
green_frog, spring_frog, Rana_clamitans
cascades_frog, Rana_cascadae
goliath_frog, Rana_goliath
pickerel_frog, Rana_palustris
tarahumara_frog, Rana_tarahumarae
grass_frog, Rana_temporaria
leptodactylid_frog, leptodactylid
robber_frog
barking_frog, robber_frog, Hylactophryne_augusti
crapaud, South_American_bullfrog, Leptodactylus_pentadactylus
tree_frog, tree-frog
tailed_frog, bell_toad, ribbed_toad, tailed_toad, Ascaphus_trui
Liopelma_hamiltoni
true_toad
bufo
agua, agua_toad, Bufo_marinus
European_toad, Bufo_bufo
natterjack, Bufo_calamita
American_toad, Bufo_americanus
Eurasian_green_toad, Bufo_viridis
American_green_toad, Bufo_debilis
Yosemite_toad, Bufo_canorus
Texas_toad, Bufo_speciosus
southwestern_toad, Bufo_microscaphus
western_toad, Bufo_boreas
obstetrical_toad, midwife_toad, Alytes_obstetricans
midwife_toad, Alytes_cisternasi
fire-bellied_toad, Bombina_bombina
spadefoot, spadefoot_toad
western_spadefoot, Scaphiopus_hammondii
southern_spadefoot, Scaphiopus_multiplicatus
plains_spadefoot, Scaphiopus_bombifrons
tree_toad, tree_frog, tree-frog
spring_peeper, Hyla_crucifer
Pacific_tree_toad, Hyla_regilla
canyon_treefrog, Hyla_arenicolor
chameleon_tree_frog
cricket_frog
northern_cricket_frog, Acris_crepitans
eastern_cricket_frog, Acris_gryllus
chorus_frog
lowland_burrowing_treefrog, northern_casque-headed_frog, Pternohyla_fodiens
western_narrow-mouthed_toad, Gastrophryne_olivacea
eastern_narrow-mouthed_toad, Gastrophryne_carolinensis
sheep_frog
tongueless_frog
Surinam_toad, Pipa_pipa, Pipa_americana
African_clawed_frog, Xenopus_laevis
South_American_poison_toad
caecilian, blindworm
reptile, reptilian
anapsid, anapsid_reptile
diapsid, diapsid_reptile
Diapsida, subclass_Diapsida
chelonian, chelonian_reptile
turtle
sea_turtle, marine_turtle
green_turtle, Chelonia_mydas
loggerhead, loggerhead_turtle, Caretta_caretta
ridley
Atlantic_ridley, bastard_ridley, bastard_turtle, Lepidochelys_kempii
Pacific_ridley, olive_ridley, Lepidochelys_olivacea
hawksbill_turtle, hawksbill, hawkbill, tortoiseshell_turtle, Eretmochelys_imbricata
leatherback_turtle, leatherback, leathery_turtle, Dermochelys_coriacea
snapping_turtle
common_snapping_turtle, snapper, Chelydra_serpentina
alligator_snapping_turtle, alligator_snapper, Macroclemys_temmincki
mud_turtle
musk_turtle, stinkpot
terrapin
diamondback_terrapin, Malaclemys_centrata
red-bellied_terrapin, red-bellied_turtle, redbelly, Pseudemys_rubriventris
slider, yellow-bellied_terrapin, Pseudemys_scripta
cooter, river_cooter, Pseudemys_concinna
box_turtle, box_tortoise
Western_box_turtle, Terrapene_ornata
painted_turtle, painted_terrapin, painted_tortoise, Chrysemys_picta
tortoise
European_tortoise, Testudo_graeca
giant_tortoise
gopher_tortoise, gopher_turtle, gopher, Gopherus_polypemus
desert_tortoise, Gopherus_agassizii
Texas_tortoise
soft-shelled_turtle, pancake_turtle
spiny_softshell, Trionyx_spiniferus
smooth_softshell, Trionyx_muticus
tuatara, Sphenodon_punctatum
saurian
lizard
gecko
flying_gecko, fringed_gecko, Ptychozoon_homalocephalum
banded_gecko
iguanid, iguanid_lizard
common_iguana, iguana, Iguana_iguana
marine_iguana, Amblyrhynchus_cristatus
desert_iguana, Dipsosaurus_dorsalis
chuckwalla, Sauromalus_obesus
zebra-tailed_lizard, gridiron-tailed_lizard, Callisaurus_draconoides
fringe-toed_lizard, Uma_notata
earless_lizard
collared_lizard
leopard_lizard
spiny_lizard
fence_lizard
western_fence_lizard, swift, blue-belly, Sceloporus_occidentalis
eastern_fence_lizard, pine_lizard, Sceloporus_undulatus
sagebrush_lizard, Sceloporus_graciosus
side-blotched_lizard, sand_lizard, Uta_stansburiana
tree_lizard, Urosaurus_ornatus
horned_lizard, horned_toad, horny_frog
Texas_horned_lizard, Phrynosoma_cornutum
basilisk
American_chameleon, anole, Anolis_carolinensis
worm_lizard
night_lizard
skink, scincid, scincid_lizard
western_skink, Eumeces_skiltonianus
mountain_skink, Eumeces_callicephalus
teiid_lizard, teiid
whiptail, whiptail_lizard
racerunner, race_runner, six-lined_racerunner, Cnemidophorus_sexlineatus
plateau_striped_whiptail, Cnemidophorus_velox
Chihuahuan_spotted_whiptail, Cnemidophorus_exsanguis
western_whiptail, Cnemidophorus_tigris
checkered_whiptail, Cnemidophorus_tesselatus
teju
caiman_lizard
agamid, agamid_lizard
agama
frilled_lizard, Chlamydosaurus_kingi
moloch
mountain_devil, spiny_lizard, Moloch_horridus
anguid_lizard
alligator_lizard
blindworm, slowworm, Anguis_fragilis
glass_lizard, glass_snake, joint_snake
legless_lizard
Lanthanotus_borneensis
venomous_lizard
Gila_monster, Heloderma_suspectum
beaded_lizard, Mexican_beaded_lizard, Heloderma_horridum
lacertid_lizard, lacertid
sand_lizard, Lacerta_agilis
green_lizard, Lacerta_viridis
chameleon, chamaeleon
African_chameleon, Chamaeleo_chamaeleon
horned_chameleon, Chamaeleo_oweni
monitor, monitor_lizard, varan
African_monitor, Varanus_niloticus
Komodo_dragon, Komodo_lizard, dragon_lizard, giant_lizard, Varanus_komodoensis
crocodilian_reptile, crocodilian
crocodile
African_crocodile, Nile_crocodile, Crocodylus_niloticus
Asian_crocodile, Crocodylus_porosus
Morlett's_crocodile
false_gavial, Tomistoma_schlegeli
alligator, gator
American_alligator, Alligator_mississipiensis
Chinese_alligator, Alligator_sinensis
caiman, cayman
spectacled_caiman, Caiman_sclerops
gavial, Gavialis_gangeticus
armored_dinosaur
stegosaur, stegosaurus, Stegosaur_stenops
ankylosaur, ankylosaurus
Edmontonia
bone-headed_dinosaur
pachycephalosaur, pachycephalosaurus
ceratopsian, horned_dinosaur
protoceratops
triceratops
styracosaur, styracosaurus
psittacosaur, psittacosaurus
ornithopod, ornithopod_dinosaur
hadrosaur, hadrosaurus, duck-billed_dinosaur
trachodon, trachodont
saurischian, saurischian_dinosaur
sauropod, sauropod_dinosaur
apatosaur, apatosaurus, brontosaur, brontosaurus, thunder_lizard, Apatosaurus_excelsus
barosaur, barosaurus
diplodocus
argentinosaur
theropod, theropod_dinosaur, bird-footed_dinosaur
ceratosaur, ceratosaurus
coelophysis
tyrannosaur, tyrannosaurus, Tyrannosaurus_rex
allosaur, allosaurus
ornithomimid
maniraptor
oviraptorid
velociraptor
deinonychus
utahraptor, superslasher
synapsid, synapsid_reptile
dicynodont
pelycosaur
dimetrodon
pterosaur, flying_reptile
pterodactyl
ichthyosaur
ichthyosaurus
stenopterygius, Stenopterygius_quadrisicissus
plesiosaur, plesiosaurus
nothosaur
snake, serpent, ophidian
colubrid_snake, colubrid
hoop_snake
thunder_snake, worm_snake, Carphophis_amoenus
ringneck_snake, ring-necked_snake, ring_snake
hognose_snake, puff_adder, sand_viper
leaf-nosed_snake
green_snake, grass_snake
smooth_green_snake, Opheodrys_vernalis
rough_green_snake, Opheodrys_aestivus
green_snake
racer
blacksnake, black_racer, Coluber_constrictor
blue_racer, Coluber_constrictor_flaviventris
horseshoe_whipsnake, Coluber_hippocrepis
whip-snake, whip_snake, whipsnake
coachwhip, coachwhip_snake, Masticophis_flagellum
California_whipsnake, striped_racer, Masticophis_lateralis
Sonoran_whipsnake, Masticophis_bilineatus
rat_snake
corn_snake, red_rat_snake, Elaphe_guttata
black_rat_snake, blacksnake, pilot_blacksnake, mountain_blacksnake, Elaphe_obsoleta
chicken_snake
Indian_rat_snake, Ptyas_mucosus
glossy_snake, Arizona_elegans
bull_snake, bull-snake
gopher_snake, Pituophis_melanoleucus
pine_snake
king_snake, kingsnake
common_kingsnake, Lampropeltis_getulus
milk_snake, house_snake, milk_adder, checkered_adder, Lampropeltis_triangulum
garter_snake, grass_snake
common_garter_snake, Thamnophis_sirtalis
ribbon_snake, Thamnophis_sauritus
Western_ribbon_snake, Thamnophis_proximus
lined_snake, Tropidoclonion_lineatum
ground_snake, Sonora_semiannulata
eastern_ground_snake, Potamophis_striatula, Haldea_striatula
water_snake
common_water_snake, banded_water_snake, Natrix_sipedon, Nerodia_sipedon
water_moccasin
grass_snake, ring_snake, ringed_snake, Natrix_natrix
viperine_grass_snake, Natrix_maura
red-bellied_snake, Storeria_occipitamaculata
sand_snake
banded_sand_snake, Chilomeniscus_cinctus
black-headed_snake
vine_snake
lyre_snake
Sonoran_lyre_snake, Trimorphodon_lambda
night_snake, Hypsiglena_torquata
blind_snake, worm_snake
western_blind_snake, Leptotyphlops_humilis
indigo_snake, gopher_snake, Drymarchon_corais
eastern_indigo_snake, Drymarchon_corais_couperi
constrictor
boa
boa_constrictor, Constrictor_constrictor
rubber_boa, tow-headed_snake, Charina_bottae
rosy_boa, Lichanura_trivirgata
anaconda, Eunectes_murinus
python
carpet_snake, Python_variegatus, Morelia_spilotes_variegatus
reticulated_python, Python_reticulatus
Indian_python, Python_molurus
rock_python, rock_snake, Python_sebae
amethystine_python
elapid, elapid_snake
coral_snake, harlequin-snake, New_World_coral_snake
eastern_coral_snake, Micrurus_fulvius
western_coral_snake, Micruroides_euryxanthus
coral_snake, Old_World_coral_snake
African_coral_snake, Aspidelaps_lubricus
Australian_coral_snake, Rhynchoelaps_australis
copperhead, Denisonia_superba
cobra
Indian_cobra, Naja_naja
asp, Egyptian_cobra, Naja_haje
black-necked_cobra, spitting_cobra, Naja_nigricollis
hamadryad, king_cobra, Ophiophagus_hannah, Naja_hannah
ringhals, rinkhals, spitting_snake, Hemachatus_haemachatus
mamba
black_mamba, Dendroaspis_augusticeps
green_mamba
death_adder, Acanthophis_antarcticus
tiger_snake, Notechis_scutatus
Australian_blacksnake, Pseudechis_porphyriacus
krait
banded_krait, banded_adder, Bungarus_fasciatus
taipan, Oxyuranus_scutellatus
sea_snake
viper
adder, common_viper, Vipera_berus
asp, asp_viper, Vipera_aspis
puff_adder, Bitis_arietans
gaboon_viper, Bitis_gabonica
horned_viper, cerastes, sand_viper, horned_asp, Cerastes_cornutus
pit_viper
copperhead, Agkistrodon_contortrix
water_moccasin, cottonmouth, cottonmouth_moccasin, Agkistrodon_piscivorus
rattlesnake, rattler
diamondback, diamondback_rattlesnake, Crotalus_adamanteus
timber_rattlesnake, banded_rattlesnake, Crotalus_horridus_horridus
canebrake_rattlesnake, canebrake_rattler, Crotalus_horridus_atricaudatus
prairie_rattlesnake, prairie_rattler, Western_rattlesnake, Crotalus_viridis
sidewinder, horned_rattlesnake, Crotalus_cerastes
Western_diamondback, Western_diamondback_rattlesnake, Crotalus_atrox
rock_rattlesnake, Crotalus_lepidus
tiger_rattlesnake, Crotalus_tigris
Mojave_rattlesnake, Crotalus_scutulatus
speckled_rattlesnake, Crotalus_mitchellii
massasauga, massasauga_rattler, Sistrurus_catenatus
ground_rattler, massasauga, Sistrurus_miliaris
fer-de-lance, Bothrops_atrops
carcase, carcass
carrion
arthropod
trilobite
arachnid, arachnoid
harvestman, daddy_longlegs, Phalangium_opilio
scorpion
false_scorpion, pseudoscorpion
book_scorpion, Chelifer_cancroides
whip-scorpion, whip_scorpion
vinegarroon, Mastigoproctus_giganteus
spider
orb-weaving_spider
black_and_gold_garden_spider, Argiope_aurantia
barn_spider, Araneus_cavaticus
garden_spider, Aranea_diademata
comb-footed_spider, theridiid
black_widow, Latrodectus_mactans
tarantula
wolf_spider, hunting_spider
European_wolf_spider, tarantula, Lycosa_tarentula
trap-door_spider
acarine
tick
hard_tick, ixodid
Ixodes_dammini, deer_tick
Ixodes_neotomae
Ixodes_pacificus, western_black-legged_tick
Ixodes_scapularis, black-legged_tick
sheep-tick, sheep_tick, Ixodes_ricinus
Ixodes_persulcatus
Ixodes_dentatus
Ixodes_spinipalpis
wood_tick, American_dog_tick, Dermacentor_variabilis
soft_tick, argasid
mite
web-spinning_mite
acarid
trombidiid
trombiculid
harvest_mite, chigger, jigger, redbug
acarus, genus_Acarus
itch_mite, sarcoptid
rust_mite
spider_mite, tetranychid
red_spider, red_spider_mite, Panonychus_ulmi
myriapod
garden_centipede, garden_symphilid, symphilid, Scutigerella_immaculata
tardigrade
centipede
house_centipede, Scutigera_coleoptrata
millipede, millepede, milliped
sea_spider, pycnogonid
Merostomata, class_Merostomata
horseshoe_crab, king_crab, Limulus_polyphemus, Xiphosurus_polyphemus
Asian_horseshoe_crab
eurypterid
tongue_worm, pentastomid
gallinaceous_bird, gallinacean
domestic_fowl, fowl, poultry
Dorking
Plymouth_Rock
Cornish, Cornish_fowl
Rock_Cornish
game_fowl
cochin, cochin_china
jungle_fowl, gallina
jungle_cock
jungle_hen
red_jungle_fowl, Gallus_gallus
chicken, Gallus_gallus
bantam
chick, biddy
cock, rooster
cockerel
capon
hen, biddy
cackler
brood_hen, broody, broody_hen, setting_hen, sitter
mother_hen
layer
pullet
spring_chicken
Rhode_Island_red
Dominique, Dominick
Orpington
turkey, Meleagris_gallopavo
turkey_cock, gobbler, tom, tom_turkey
ocellated_turkey, Agriocharis_ocellata
grouse
black_grouse
European_black_grouse, heathfowl, Lyrurus_tetrix
Asian_black_grouse, Lyrurus_mlokosiewiczi
blackcock, black_cock
greyhen, grayhen, grey_hen, gray_hen, heath_hen
ptarmigan
red_grouse, moorfowl, moorbird, moor-bird, moorgame, Lagopus_scoticus
moorhen
capercaillie, capercailzie, horse_of_the_wood, Tetrao_urogallus
spruce_grouse, Canachites_canadensis
sage_grouse, sage_hen, Centrocercus_urophasianus
ruffed_grouse, partridge, Bonasa_umbellus
sharp-tailed_grouse, sprigtail, sprig_tail, Pedioecetes_phasianellus
prairie_chicken, prairie_grouse, prairie_fowl
greater_prairie_chicken, Tympanuchus_cupido
lesser_prairie_chicken, Tympanuchus_pallidicinctus
heath_hen, Tympanuchus_cupido_cupido
guan
curassow
piping_guan
chachalaca
Texas_chachalaca, Ortilis_vetula_macalli
megapode, mound_bird, mound-bird, mound_builder, scrub_fowl
mallee_fowl, leipoa, lowan, Leipoa_ocellata
mallee_hen
brush_turkey, Alectura_lathami
maleo, Macrocephalon_maleo
phasianid
pheasant
ring-necked_pheasant, Phasianus_colchicus
afropavo, Congo_peafowl, Afropavo_congensis
argus, argus_pheasant
golden_pheasant, Chrysolophus_pictus
bobwhite, bobwhite_quail, partridge
northern_bobwhite, Colinus_virginianus
Old_World_quail
migratory_quail, Coturnix_coturnix, Coturnix_communis
monal, monaul
peafowl, bird_of_Juno
peachick, pea-chick
peacock
peahen
blue_peafowl, Pavo_cristatus
green_peafowl, Pavo_muticus
quail
California_quail, Lofortyx_californicus
tragopan
partridge
Hungarian_partridge, grey_partridge, gray_partridge, Perdix_perdix
red-legged_partridge, Alectoris_ruffa
Greek_partridge, rock_partridge, Alectoris_graeca
mountain_quail, mountain_partridge, Oreortyx_picta_palmeri
guinea_fowl, guinea, Numida_meleagris
guinea_hen
hoatzin, hoactzin, stinkbird, Opisthocomus_hoazin
tinamou, partridge
columbiform_bird
dodo, Raphus_cucullatus
pigeon
pouter_pigeon, pouter
dove
rock_dove, rock_pigeon, Columba_livia
band-tailed_pigeon, band-tail_pigeon, bandtail, Columba_fasciata
wood_pigeon, ringdove, cushat, Columba_palumbus
turtledove
Streptopelia_turtur
ringdove, Streptopelia_risoria
Australian_turtledove, turtledove, Stictopelia_cuneata
mourning_dove, Zenaidura_macroura
domestic_pigeon
squab
fairy_swallow
roller, tumbler, tumbler_pigeon
homing_pigeon, homer
carrier_pigeon
passenger_pigeon, Ectopistes_migratorius
sandgrouse, sand_grouse
painted_sandgrouse, Pterocles_indicus
pin-tailed_sandgrouse, pin-tailed_grouse, Pterocles_alchata
pallas's_sandgrouse, Syrrhaptes_paradoxus
parrot
popinjay
poll, poll_parrot
African_grey, African_gray, Psittacus_erithacus
amazon
macaw
kea, Nestor_notabilis
cockatoo
sulphur-crested_cockatoo, Kakatoe_galerita, Cacatua_galerita
pink_cockatoo, Kakatoe_leadbeateri
cockateel, cockatiel, cockatoo_parrot, Nymphicus_hollandicus
lovebird
lory
lorikeet
varied_Lorikeet, Glossopsitta_versicolor
rainbow_lorikeet, Trichoglossus_moluccanus
parakeet, parrakeet, parroket, paraquet, paroquet, parroquet
Carolina_parakeet, Conuropsis_carolinensis
budgerigar, budgereegah, budgerygah, budgie, grass_parakeet, lovebird, shell_parakeet, Melopsittacus_undulatus
ring-necked_parakeet, Psittacula_krameri
cuculiform_bird
cuckoo
European_cuckoo, Cuculus_canorus
black-billed_cuckoo, Coccyzus_erythropthalmus
roadrunner, chaparral_cock, Geococcyx_californianus
ani
coucal
crow_pheasant, Centropus_sinensis
touraco, turaco, turacou, turakoo
coraciiform_bird
roller
European_roller, Coracias_garrulus
ground_roller
kingfisher
Eurasian_kingfisher, Alcedo_atthis
belted_kingfisher, Ceryle_alcyon
kookaburra, laughing_jackass, Dacelo_gigas
bee_eater
hornbill
hoopoe, hoopoo
Euopean_hoopoe, Upupa_epops
wood_hoopoe
motmot, momot
tody
apodiform_bird
swift
European_swift, Apus_apus
chimney_swift, chimney_swallow, Chateura_pelagica
swiftlet, Collocalia_inexpectata
tree_swift, crested_swift
hummingbird
Archilochus_colubris
thornbill
goatsucker, nightjar, caprimulgid
European_goatsucker, European_nightjar, Caprimulgus_europaeus
chuck-will's-widow, Caprimulgus_carolinensis
whippoorwill, Caprimulgus_vociferus
poorwill, Phalaenoptilus_nuttallii
frogmouth
oilbird, guacharo, Steatornis_caripensis
piciform_bird
woodpecker, peckerwood, pecker
green_woodpecker, Picus_viridis
downy_woodpecker
flicker
yellow-shafted_flicker, Colaptes_auratus, yellowhammer
gilded_flicker, Colaptes_chrysoides
red-shafted_flicker, Colaptes_caper_collaris
ivorybill, ivory-billed_woodpecker, Campephilus_principalis
redheaded_woodpecker, redhead, Melanerpes_erythrocephalus
sapsucker
yellow-bellied_sapsucker, Sphyrapicus_varius
red-breasted_sapsucker, Sphyrapicus_varius_ruber
wryneck
piculet
barbet
puffbird
honey_guide
jacamar
toucan
toucanet
trogon
quetzal, quetzal_bird
resplendent_quetzel, resplendent_trogon, Pharomacrus_mocino
aquatic_bird
waterfowl, water_bird, waterbird
anseriform_bird
duck
drake
quack-quack
duckling
diving_duck
dabbling_duck, dabbler
mallard, Anas_platyrhynchos
black_duck, Anas_rubripes
teal
greenwing, green-winged_teal, Anas_crecca
bluewing, blue-winged_teal, Anas_discors
garganey, Anas_querquedula
widgeon, wigeon, Anas_penelope
American_widgeon, baldpate, Anas_americana
shoveler, shoveller, broadbill, Anas_clypeata
pintail, pin-tailed_duck, Anas_acuta
sheldrake
shelduck
ruddy_duck, Oxyura_jamaicensis
bufflehead, butterball, dipper, Bucephela_albeola
goldeneye, whistler, Bucephela_clangula
Barrow's_goldeneye, Bucephala_islandica
canvasback, canvasback_duck, Aythya_valisineria
pochard, Aythya_ferina
redhead, Aythya_americana
scaup, scaup_duck, bluebill, broadbill
greater_scaup, Aythya_marila
lesser_scaup, lesser_scaup_duck, lake_duck, Aythya_affinis
wild_duck
wood_duck, summer_duck, wood_widgeon, Aix_sponsa
wood_drake
mandarin_duck, Aix_galericulata
muscovy_duck, musk_duck, Cairina_moschata
sea_duck
eider, eider_duck
scoter, scooter
common_scoter, Melanitta_nigra
old_squaw, oldwife, Clangula_hyemalis
merganser, fish_duck, sawbill, sheldrake
goosander, Mergus_merganser
American_merganser, Mergus_merganser_americanus
red-breasted_merganser, Mergus_serrator
smew, Mergus_albellus
hooded_merganser, hooded_sheldrake, Lophodytes_cucullatus
goose
gosling
gander
Chinese_goose, Anser_cygnoides
greylag, graylag, greylag_goose, graylag_goose, Anser_anser
blue_goose, Chen_caerulescens
snow_goose
brant, brant_goose, brent, brent_goose
common_brant_goose, Branta_bernicla
honker, Canada_goose, Canadian_goose, Branta_canadensis
barnacle_goose, barnacle, Branta_leucopsis
coscoroba
swan
cob
pen
cygnet
mute_swan, Cygnus_olor
whooper, whooper_swan, Cygnus_cygnus
tundra_swan, Cygnus_columbianus
whistling_swan, Cygnus_columbianus_columbianus
Bewick's_swan, Cygnus_columbianus_bewickii
trumpeter, trumpeter_swan, Cygnus_buccinator
black_swan, Cygnus_atratus
screamer
horned_screamer, Anhima_cornuta
crested_screamer
chaja, Chauna_torquata
mammal, mammalian
female_mammal
tusker
prototherian
monotreme, egg-laying_mammal
echidna, spiny_anteater, anteater
echidna, spiny_anteater, anteater
platypus, duckbill, duckbilled_platypus, duck-billed_platypus, Ornithorhynchus_anatinus
marsupial, pouched_mammal
opossum, possum
common_opossum, Didelphis_virginiana, Didelphis_marsupialis
crab-eating_opossum
opossum_rat
bandicoot
rabbit-eared_bandicoot, rabbit_bandicoot, bilby, Macrotis_lagotis
kangaroo
giant_kangaroo, great_grey_kangaroo, Macropus_giganteus
wallaby, brush_kangaroo
common_wallaby, Macropus_agiles
hare_wallaby, kangaroo_hare
nail-tailed_wallaby, nail-tailed_kangaroo
rock_wallaby, rock_kangaroo
pademelon, paddymelon
tree_wallaby, tree_kangaroo
musk_kangaroo, Hypsiprymnodon_moschatus
rat_kangaroo, kangaroo_rat
potoroo
bettong
jerboa_kangaroo, kangaroo_jerboa
phalanger, opossum, possum
cuscus
brush-tailed_phalanger, Trichosurus_vulpecula
flying_phalanger, flying_opossum, flying_squirrel
koala, koala_bear, kangaroo_bear, native_bear, Phascolarctos_cinereus
wombat
dasyurid_marsupial, dasyurid
dasyure
eastern_dasyure, Dasyurus_quoll
native_cat, Dasyurus_viverrinus
thylacine, Tasmanian_wolf, Tasmanian_tiger, Thylacinus_cynocephalus
Tasmanian_devil, ursine_dasyure, Sarcophilus_hariisi
pouched_mouse, marsupial_mouse, marsupial_rat
numbat, banded_anteater, anteater, Myrmecobius_fasciatus
pouched_mole, marsupial_mole, Notoryctus_typhlops
placental, placental_mammal, eutherian, eutherian_mammal
livestock, stock, farm_animal
bull
cow
calf
calf
yearling
buck
doe
insectivore
mole
starnose_mole, star-nosed_mole, Condylura_cristata
brewer's_mole, hair-tailed_mole, Parascalops_breweri
golden_mole
shrew_mole
Asiatic_shrew_mole, Uropsilus_soricipes
American_shrew_mole, Neurotrichus_gibbsii
shrew, shrewmouse
common_shrew, Sorex_araneus
masked_shrew, Sorex_cinereus
short-tailed_shrew, Blarina_brevicauda
water_shrew
American_water_shrew, Sorex_palustris
European_water_shrew, Neomys_fodiens
Mediterranean_water_shrew, Neomys_anomalus
least_shrew, Cryptotis_parva
hedgehog, Erinaceus_europaeus, Erinaceus_europeaeus
tenrec, tendrac
tailless_tenrec, Tenrec_ecaudatus
otter_shrew, potamogale, Potamogale_velox
eiderdown
aftershaft
sickle_feather
contour_feather
bastard_wing, alula, spurious_wing
saddle_hackle, saddle_feather
encolure
hair
squama
scute
sclerite
plastron
scallop_shell
oyster_shell
theca
invertebrate
sponge, poriferan, parazoan
choanocyte, collar_cell
glass_sponge
Venus's_flower_basket
metazoan
coelenterate, cnidarian
planula
polyp
medusa, medusoid, medusan
jellyfish
scyphozoan
Chrysaora_quinquecirrha
hydrozoan, hydroid
hydra
siphonophore
nanomia
Portuguese_man-of-war, man-of-war, jellyfish
praya
apolemia
anthozoan, actinozoan
sea_anemone, anemone
actinia, actinian, actiniarian
sea_pen
coral
gorgonian, gorgonian_coral
sea_feather
sea_fan
red_coral
stony_coral, madrepore, madriporian_coral
brain_coral
staghorn_coral, stag's-horn_coral
mushroom_coral
ctenophore, comb_jelly
beroe
platyctenean
sea_gooseberry
Venus's_girdle, Cestum_veneris
worm
helminth, parasitic_worm
woodworm
woodborer, borer
acanthocephalan, spiny-headed_worm
arrowworm, chaetognath
bladder_worm
flatworm, platyhelminth
planarian, planaria
fluke, trematode, trematode_worm
cercaria
liver_fluke, Fasciola_hepatica
Fasciolopsis_buski
schistosome, blood_fluke
tapeworm, cestode
echinococcus
taenia
ribbon_worm, nemertean, nemertine, proboscis_worm
beard_worm, pogonophoran
rotifer
nematode, nematode_worm, roundworm
common_roundworm, Ascaris_lumbricoides
chicken_roundworm, Ascaridia_galli
pinworm, threadworm, Enterobius_vermicularis
eelworm
vinegar_eel, vinegar_worm, Anguillula_aceti, Turbatrix_aceti
trichina, Trichinella_spiralis
hookworm
filaria
Guinea_worm, Dracunculus_medinensis
annelid, annelid_worm, segmented_worm
archiannelid
oligochaete, oligochaete_worm
earthworm, angleworm, fishworm, fishing_worm, wiggler, nightwalker, nightcrawler, crawler, dew_worm, red_worm
polychaete, polychete, polychaete_worm, polychete_worm
lugworm, lug, lobworm
sea_mouse
bloodworm
leech, bloodsucker, hirudinean
medicinal_leech, Hirudo_medicinalis
horseleech
mollusk, mollusc, shellfish
scaphopod
tooth_shell, tusk_shell
gastropod, univalve
abalone, ear-shell
ormer, sea-ear, Haliotis_tuberculata
scorpion_shell
conch
giant_conch, Strombus_gigas
snail
edible_snail, Helix_pomatia
garden_snail
brown_snail, Helix_aspersa
Helix_hortensis
slug
seasnail
neritid, neritid_gastropod
nerita
bleeding_tooth, Nerita_peloronta
neritina
whelk
moon_shell, moonshell
periwinkle, winkle
limpet
common_limpet, Patella_vulgata
keyhole_limpet, Fissurella_apertura, Diodora_apertura
river_limpet, freshwater_limpet, Ancylus_fluviatilis
sea_slug, nudibranch
sea_hare, Aplysia_punctata
Hermissenda_crassicornis
bubble_shell
physa
cowrie, cowry
money_cowrie, Cypraea_moneta
tiger_cowrie, Cypraea_tigris
solenogaster, aplacophoran
chiton, coat-of-mail_shell, sea_cradle, polyplacophore
bivalve, pelecypod, lamellibranch
spat
clam
seashell
soft-shell_clam, steamer, steamer_clam, long-neck_clam, Mya_arenaria
quahog, quahaug, hard-shell_clam, hard_clam, round_clam, Venus_mercenaria, Mercenaria_mercenaria
littleneck, littleneck_clam
cherrystone, cherrystone_clam
geoduck
razor_clam, jackknife_clam, knife-handle
giant_clam, Tridacna_gigas
cockle
edible_cockle, Cardium_edule
oyster
Japanese_oyster, Ostrea_gigas
Virginia_oyster
pearl_oyster, Pinctada_margaritifera
saddle_oyster, Anomia_ephippium
window_oyster, windowpane_oyster, capiz, Placuna_placenta
ark_shell
blood_clam
mussel
marine_mussel, mytilid
edible_mussel, Mytilus_edulis
freshwater_mussel, freshwater_clam
pearly-shelled_mussel
thin-shelled_mussel
zebra_mussel, Dreissena_polymorpha
scallop, scollop, escallop
bay_scallop, Pecten_irradians
sea_scallop, giant_scallop, Pecten_magellanicus
shipworm, teredinid
teredo
piddock
cephalopod, cephalopod_mollusk
chambered_nautilus, pearly_nautilus, nautilus
octopod
octopus, devilfish
paper_nautilus, nautilus, Argonaut, Argonauta_argo
decapod
squid
loligo
ommastrephes
architeuthis, giant_squid
cuttlefish, cuttle
spirula, Spirula_peronii
crustacean
malacostracan_crustacean
decapod_crustacean, decapod
brachyuran
crab
stone_crab, Menippe_mercenaria
hard-shell_crab
soft-shell_crab, soft-shelled_crab
Dungeness_crab, Cancer_magister
rock_crab, Cancer_irroratus
Jonah_crab, Cancer_borealis
swimming_crab
English_lady_crab, Portunus_puber
American_lady_crab, lady_crab, calico_crab, Ovalipes_ocellatus
blue_crab, Callinectes_sapidus
fiddler_crab
pea_crab
king_crab, Alaska_crab, Alaskan_king_crab, Alaska_king_crab, Paralithodes_camtschatica
spider_crab
European_spider_crab, king_crab, Maja_squinado
giant_crab, Macrocheira_kaempferi
lobster
true_lobster
American_lobster, Northern_lobster, Maine_lobster, Homarus_americanus
European_lobster, Homarus_vulgaris
Cape_lobster, Homarus_capensis
Norway_lobster, Nephrops_norvegicus
spiny_lobster, langouste, rock_lobster, crawfish, crayfish, sea_crawfish
crayfish, crawfish, crawdad, crawdaddy
Old_World_crayfish, ecrevisse
American_crayfish
hermit_crab
shrimp
snapping_shrimp, pistol_shrimp
prawn
long-clawed_prawn, river_prawn, Palaemon_australis
tropical_prawn
krill
Euphausia_pacifica
opossum_shrimp
stomatopod, stomatopod_crustacean
mantis_shrimp, mantis_crab
squilla, mantis_prawn
isopod
woodlouse, slater
pill_bug
sow_bug
sea_louse, sea_slater
amphipod
skeleton_shrimp
whale_louse
daphnia, water_flea
fairy_shrimp
brine_shrimp, Artemia_salina
tadpole_shrimp
copepod, copepod_crustacean
cyclops, water_flea
seed_shrimp, mussel_shrimp, ostracod
barnacle, cirriped, cirripede
acorn_barnacle, rock_barnacle, Balanus_balanoides
goose_barnacle, gooseneck_barnacle, Lepas_fascicularis
onychophoran, velvet_worm, peripatus
wading_bird, wader
stork
white_stork, Ciconia_ciconia
black_stork, Ciconia_nigra
adjutant_bird, adjutant, adjutant_stork, Leptoptilus_dubius
marabou, marabout, marabou_stork, Leptoptilus_crumeniferus
openbill
jabiru, Jabiru_mycteria
saddlebill, jabiru, Ephippiorhynchus_senegalensis
policeman_bird, black-necked_stork, jabiru, Xenorhyncus_asiaticus
wood_ibis, wood_stork, flinthead, Mycteria_americana
shoebill, shoebird, Balaeniceps_rex
ibis
wood_ibis, wood_stork, Ibis_ibis
sacred_ibis, Threskiornis_aethiopica
spoonbill
common_spoonbill, Platalea_leucorodia
roseate_spoonbill, Ajaia_ajaja
flamingo
heron
great_blue_heron, Ardea_herodius
great_white_heron, Ardea_occidentalis
egret
little_blue_heron, Egretta_caerulea
snowy_egret, snowy_heron, Egretta_thula
little_egret, Egretta_garzetta
great_white_heron, Casmerodius_albus
American_egret, great_white_heron, Egretta_albus
cattle_egret, Bubulcus_ibis
night_heron, night_raven
black-crowned_night_heron, Nycticorax_nycticorax
yellow-crowned_night_heron, Nyctanassa_violacea
boatbill, boat-billed_heron, broadbill, Cochlearius_cochlearius
bittern
American_bittern, stake_driver, Botaurus_lentiginosus
European_bittern, Botaurus_stellaris
least_bittern, Ixobrychus_exilis
crane
whooping_crane, whooper, Grus_americana
courlan, Aramus_guarauna
limpkin, Aramus_pictus
crested_cariama, seriema, Cariama_cristata
chunga, seriema, Chunga_burmeisteri
rail
weka, maori_hen, wood_hen
crake
corncrake, land_rail, Crex_crex
spotted_crake, Porzana_porzana
gallinule, marsh_hen, water_hen, swamphen
Florida_gallinule, Gallinula_chloropus_cachinnans
moorhen, Gallinula_chloropus
purple_gallinule
European_gallinule, Porphyrio_porphyrio
American_gallinule, Porphyrula_martinica
notornis, takahe, Notornis_mantelli
coot
American_coot, marsh_hen, mud_hen, water_hen, Fulica_americana
Old_World_coot, Fulica_atra
bustard
great_bustard, Otis_tarda
plain_turkey, Choriotis_australis
button_quail, button-quail, bustard_quail, hemipode
striped_button_quail, Turnix_sylvatica
plain_wanderer, Pedionomus_torquatus
trumpeter
Brazilian_trumpeter, Psophia_crepitans
seabird, sea_bird, seafowl
shorebird, shore_bird, limicoline_bird
plover
piping_plover, Charadrius_melodus
killdeer, kildeer, killdeer_plover, Charadrius_vociferus
dotterel, dotrel, Charadrius_morinellus, Eudromias_morinellus
golden_plover
lapwing, green_plover, peewit, pewit
turnstone
ruddy_turnstone, Arenaria_interpres
black_turnstone, Arenaria-Melanocephala
sandpiper
surfbird, Aphriza_virgata
European_sandpiper, Actitis_hypoleucos
spotted_sandpiper, Actitis_macularia
least_sandpiper, stint, Erolia_minutilla
red-backed_sandpiper, dunlin, Erolia_alpina
greenshank, Tringa_nebularia
redshank, Tringa_totanus
yellowlegs
greater_yellowlegs, Tringa_melanoleuca
lesser_yellowlegs, Tringa_flavipes
pectoral_sandpiper, jacksnipe, Calidris_melanotos
knot, greyback, grayback, Calidris_canutus
curlew_sandpiper, Calidris_Ferruginea
sanderling, Crocethia_alba
upland_sandpiper, upland_plover, Bartramian_sandpiper, Bartramia_longicauda
ruff, Philomachus_pugnax
reeve
tattler
Polynesian_tattler, Heteroscelus_incanus
willet, Catoptrophorus_semipalmatus
woodcock
Eurasian_woodcock, Scolopax_rusticola
American_woodcock, woodcock_snipe, Philohela_minor
snipe
whole_snipe, Gallinago_gallinago
Wilson's_snipe, Gallinago_gallinago_delicata
great_snipe, woodcock_snipe, Gallinago_media
jacksnipe, half_snipe, Limnocryptes_minima
dowitcher
greyback, grayback, Limnodromus_griseus
red-breasted_snipe, Limnodromus_scolopaceus
curlew
European_curlew, Numenius_arquata
Eskimo_curlew, Numenius_borealis
godwit
Hudsonian_godwit, Limosa_haemastica
stilt, stiltbird, longlegs, long-legs, stilt_plover, Himantopus_stilt
black-necked_stilt, Himantopus_mexicanus
black-winged_stilt, Himantopus_himantopus
white-headed_stilt, Himantopus_himantopus_leucocephalus
kaki, Himantopus_novae-zelandiae
stilt, Australian_stilt
banded_stilt, Cladorhyncus_leucocephalum
avocet
oystercatcher, oyster_catcher
phalarope
red_phalarope, Phalaropus_fulicarius
northern_phalarope, Lobipes_lobatus
Wilson's_phalarope, Steganopus_tricolor
pratincole, glareole
courser
cream-colored_courser, Cursorius_cursor
crocodile_bird, Pluvianus_aegyptius
stone_curlew, thick-knee, Burhinus_oedicnemus
coastal_diving_bird
larid
gull, seagull, sea_gull
mew, mew_gull, sea_mew, Larus_canus
black-backed_gull, great_black-backed_gull, cob, Larus_marinus
herring_gull, Larus_argentatus
laughing_gull, blackcap, pewit, pewit_gull, Larus_ridibundus
ivory_gull, Pagophila_eburnea
kittiwake
tern
sea_swallow, Sterna_hirundo
skimmer
jaeger
parasitic_jaeger, arctic_skua, Stercorarius_parasiticus
skua, bonxie
great_skua, Catharacta_skua
auk
auklet
razorbill, razor-billed_auk, Alca_torda
little_auk, dovekie, Plautus_alle
guillemot
black_guillemot, Cepphus_grylle
pigeon_guillemot, Cepphus_columba
murre
common_murre, Uria_aalge
thick-billed_murre, Uria_lomvia
puffin
Atlantic_puffin, Fratercula_arctica
horned_puffin, Fratercula_corniculata
tufted_puffin, Lunda_cirrhata
gaviiform_seabird
loon, diver
podicipitiform_seabird
grebe
great_crested_grebe, Podiceps_cristatus
red-necked_grebe, Podiceps_grisegena
black-necked_grebe, eared_grebe, Podiceps_nigricollis
dabchick, little_grebe, Podiceps_ruficollis
pied-billed_grebe, Podilymbus_podiceps
pelecaniform_seabird
pelican
white_pelican, Pelecanus_erythrorhynchos
Old_world_white_pelican, Pelecanus_onocrotalus
frigate_bird, man-of-war_bird
gannet
solan, solan_goose, solant_goose, Sula_bassana
booby
cormorant, Phalacrocorax_carbo
snakebird, anhinga, darter
water_turkey, Anhinga_anhinga
tropic_bird, tropicbird, boatswain_bird
sphenisciform_seabird
penguin
Adelie, Adelie_penguin, Pygoscelis_adeliae
king_penguin, Aptenodytes_patagonica
emperor_penguin, Aptenodytes_forsteri
jackass_penguin, Spheniscus_demersus
rock_hopper, crested_penguin
pelagic_bird, oceanic_bird
procellariiform_seabird
albatross, mollymawk
wandering_albatross, Diomedea_exulans
black-footed_albatross, gooney, gooney_bird, goonie, goony, Diomedea_nigripes
petrel
white-chinned_petrel, Procellaria_aequinoctialis
giant_petrel, giant_fulmar, Macronectes_giganteus
fulmar, fulmar_petrel, Fulmarus_glacialis
shearwater
Manx_shearwater, Puffinus_puffinus
storm_petrel
stormy_petrel, northern_storm_petrel, Hydrobates_pelagicus
Mother_Carey's_chicken, Mother_Carey's_hen, Oceanites_oceanicus
diving_petrel
aquatic_mammal
cetacean, cetacean_mammal, blower
whale
baleen_whale, whalebone_whale
right_whale
bowhead, bowhead_whale, Greenland_whale, Balaena_mysticetus
rorqual, razorback
blue_whale, sulfur_bottom, Balaenoptera_musculus
finback, finback_whale, fin_whale, common_rorqual, Balaenoptera_physalus
sei_whale, Balaenoptera_borealis
lesser_rorqual, piked_whale, minke_whale, Balaenoptera_acutorostrata
humpback, humpback_whale, Megaptera_novaeangliae
grey_whale, gray_whale, devilfish, Eschrichtius_gibbosus, Eschrichtius_robustus
toothed_whale
sperm_whale, cachalot, black_whale, Physeter_catodon
pygmy_sperm_whale, Kogia_breviceps
dwarf_sperm_whale, Kogia_simus
beaked_whale
bottle-nosed_whale, bottlenose_whale, bottlenose, Hyperoodon_ampullatus
dolphin
common_dolphin, Delphinus_delphis
bottlenose_dolphin, bottle-nosed_dolphin, bottlenose
Atlantic_bottlenose_dolphin, Tursiops_truncatus
Pacific_bottlenose_dolphin, Tursiops_gilli
porpoise
harbor_porpoise, herring_hog, Phocoena_phocoena
vaquita, Phocoena_sinus
grampus, Grampus_griseus
killer_whale, killer, orca, grampus, sea_wolf, Orcinus_orca
pilot_whale, black_whale, common_blackfish, blackfish, Globicephala_melaena
river_dolphin
narwhal, narwal, narwhale, Monodon_monoceros
white_whale, beluga, Delphinapterus_leucas
sea_cow, sirenian_mammal, sirenian
manatee, Trichechus_manatus
dugong, Dugong_dugon
Steller's_sea_cow, Hydrodamalis_gigas
carnivore
omnivore
pinniped_mammal, pinniped, pinnatiped
seal
crabeater_seal, crab-eating_seal
eared_seal
fur_seal
guadalupe_fur_seal, Arctocephalus_philippi
fur_seal
Alaska_fur_seal, Callorhinus_ursinus
sea_lion
South_American_sea_lion, Otaria_Byronia
California_sea_lion, Zalophus_californianus, Zalophus_californicus
Australian_sea_lion, Zalophus_lobatus
Steller_sea_lion, Steller's_sea_lion, Eumetopias_jubatus
earless_seal, true_seal, hair_seal
harbor_seal, common_seal, Phoca_vitulina
harp_seal, Pagophilus_groenlandicus
elephant_seal, sea_elephant
bearded_seal, squareflipper_square_flipper, Erignathus_barbatus
hooded_seal, bladdernose, Cystophora_cristata
walrus, seahorse, sea_horse
Atlantic_walrus, Odobenus_rosmarus
Pacific_walrus, Odobenus_divergens
Fissipedia
fissiped_mammal, fissiped
aardvark, ant_bear, anteater, Orycteropus_afer
canine, canid
bitch
brood_bitch
dog, domestic_dog, Canis_familiaris
pooch, doggie, doggy, barker, bow-wow
cur, mongrel, mutt
feist, fice
pariah_dog, pye-dog, pie-dog
lapdog
toy_dog, toy
Chihuahua
Japanese_spaniel
Maltese_dog, Maltese_terrier, Maltese
Pekinese, Pekingese, Peke
Shih-Tzu
toy_spaniel
English_toy_spaniel
Blenheim_spaniel
King_Charles_spaniel
papillon
toy_terrier
hunting_dog
courser
Rhodesian_ridgeback
hound, hound_dog
Afghan_hound, Afghan
basset, basset_hound
beagle
bloodhound, sleuthhound
bluetick
boarhound
coonhound
coondog
black-and-tan_coonhound
dachshund, dachsie, badger_dog
sausage_dog, sausage_hound
foxhound
American_foxhound
Walker_hound, Walker_foxhound
English_foxhound
harrier
Plott_hound
redbone
wolfhound
borzoi, Russian_wolfhound
Irish_wolfhound
greyhound
Italian_greyhound
whippet
Ibizan_hound, Ibizan_Podenco
Norwegian_elkhound, elkhound
otterhound, otter_hound
Saluki, gazelle_hound
Scottish_deerhound, deerhound
staghound
Weimaraner
terrier
bullterrier, bull_terrier
Staffordshire_bullterrier, Staffordshire_bull_terrier
American_Staffordshire_terrier, Staffordshire_terrier, American_pit_bull_terrier, pit_bull_terrier
Bedlington_terrier
Border_terrier
Kerry_blue_terrier
Irish_terrier
Norfolk_terrier
Norwich_terrier
Yorkshire_terrier
rat_terrier, ratter
Manchester_terrier, black-and-tan_terrier
toy_Manchester, toy_Manchester_terrier
fox_terrier
smooth-haired_fox_terrier
wire-haired_fox_terrier
wirehair, wirehaired_terrier, wire-haired_terrier
Lakeland_terrier
Welsh_terrier
Sealyham_terrier, Sealyham
Airedale, Airedale_terrier
cairn, cairn_terrier
Australian_terrier
Dandie_Dinmont, Dandie_Dinmont_terrier
Boston_bull, Boston_terrier
schnauzer
miniature_schnauzer
giant_schnauzer
standard_schnauzer
Scotch_terrier, Scottish_terrier, Scottie
Tibetan_terrier, chrysanthemum_dog
silky_terrier, Sydney_silky
Skye_terrier
Clydesdale_terrier
soft-coated_wheaten_terrier
West_Highland_white_terrier
Lhasa, Lhasa_apso
sporting_dog, gun_dog
bird_dog
water_dog
retriever
flat-coated_retriever
curly-coated_retriever
golden_retriever
Labrador_retriever
Chesapeake_Bay_retriever
pointer, Spanish_pointer
German_short-haired_pointer
setter
vizsla, Hungarian_pointer
English_setter
Irish_setter, red_setter
Gordon_setter
spaniel
Brittany_spaniel
clumber, clumber_spaniel
field_spaniel
springer_spaniel, springer
English_springer, English_springer_spaniel
Welsh_springer_spaniel
cocker_spaniel, English_cocker_spaniel, cocker
Sussex_spaniel
water_spaniel
American_water_spaniel
Irish_water_spaniel
griffon, wire-haired_pointing_griffon
working_dog
watchdog, guard_dog
kuvasz
attack_dog
housedog
schipperke
shepherd_dog, sheepdog, sheep_dog
Belgian_sheepdog, Belgian_shepherd
groenendael
malinois
briard
kelpie
komondor
Old_English_sheepdog, bobtail
Shetland_sheepdog, Shetland_sheep_dog, Shetland
collie
Border_collie
Bouvier_des_Flandres, Bouviers_des_Flandres
Rottweiler
German_shepherd, German_shepherd_dog, German_police_dog, alsatian
police_dog
pinscher
Doberman, Doberman_pinscher
miniature_pinscher
Sennenhunde
Greater_Swiss_Mountain_dog
Bernese_mountain_dog
Appenzeller
EntleBucher
boxer
mastiff
bull_mastiff
Tibetan_mastiff
bulldog, English_bulldog
French_bulldog
Great_Dane
guide_dog
Seeing_Eye_dog
hearing_dog
Saint_Bernard, St_Bernard
seizure-alert_dog
sled_dog, sledge_dog
Eskimo_dog, husky
malamute, malemute, Alaskan_malamute
Siberian_husky
dalmatian, coach_dog, carriage_dog
liver-spotted_dalmatian
affenpinscher, monkey_pinscher, monkey_dog
basenji
pug, pug-dog
Leonberg
Newfoundland, Newfoundland_dog
Great_Pyrenees
spitz
Samoyed, Samoyede
Pomeranian
chow, chow_chow
keeshond
griffon, Brussels_griffon, Belgian_griffon
Brabancon_griffon
corgi, Welsh_corgi
Pembroke, Pembroke_Welsh_corgi
Cardigan, Cardigan_Welsh_corgi
poodle, poodle_dog
toy_poodle
miniature_poodle
standard_poodle
large_poodle
Mexican_hairless
wolf
timber_wolf, grey_wolf, gray_wolf, Canis_lupus
white_wolf, Arctic_wolf, Canis_lupus_tundrarum
red_wolf, maned_wolf, Canis_rufus, Canis_niger
coyote, prairie_wolf, brush_wolf, Canis_latrans
coydog
jackal, Canis_aureus
wild_dog
dingo, warrigal, warragal, Canis_dingo
dhole, Cuon_alpinus
crab-eating_dog, crab-eating_fox, Dusicyon_cancrivorus
raccoon_dog, Nyctereutes_procyonides
African_hunting_dog, hyena_dog, Cape_hunting_dog, Lycaon_pictus
hyena, hyaena
striped_hyena, Hyaena_hyaena
brown_hyena, strand_wolf, Hyaena_brunnea
spotted_hyena, laughing_hyena, Crocuta_crocuta
aardwolf, Proteles_cristata
fox
vixen
Reynard
red_fox, Vulpes_vulpes
black_fox
silver_fox
red_fox, Vulpes_fulva
kit_fox, prairie_fox, Vulpes_velox
kit_fox, Vulpes_macrotis
Arctic_fox, white_fox, Alopex_lagopus
blue_fox
grey_fox, gray_fox, Urocyon_cinereoargenteus
feline, felid
cat, true_cat
domestic_cat, house_cat, Felis_domesticus, Felis_catus
kitty, kitty-cat, puss, pussy, pussycat
mouser
alley_cat
stray
tom, tomcat
gib
tabby, queen
kitten, kitty
tabby, tabby_cat
tiger_cat
tortoiseshell, tortoiseshell-cat, calico_cat
Persian_cat
Angora, Angora_cat
Siamese_cat, Siamese
blue_point_Siamese
Burmese_cat
Egyptian_cat
Maltese, Maltese_cat
Abyssinian, Abyssinian_cat
Manx, Manx_cat
wildcat
sand_cat
European_wildcat, catamountain, Felis_silvestris
cougar, puma, catamount, mountain_lion, painter, panther, Felis_concolor
ocelot, panther_cat, Felis_pardalis
jaguarundi, jaguarundi_cat, jaguarondi, eyra, Felis_yagouaroundi
kaffir_cat, caffer_cat, Felis_ocreata
jungle_cat, Felis_chaus
serval, Felis_serval
leopard_cat, Felis_bengalensis
margay, margay_cat, Felis_wiedi
manul, Pallas's_cat, Felis_manul
lynx, catamount
common_lynx, Lynx_lynx
Canada_lynx, Lynx_canadensis
bobcat, bay_lynx, Lynx_rufus
spotted_lynx, Lynx_pardina
caracal, desert_lynx, Lynx_caracal
big_cat, cat
leopard, Panthera_pardus
leopardess
panther
snow_leopard, ounce, Panthera_uncia
jaguar, panther, Panthera_onca, Felis_onca
lion, king_of_beasts, Panthera_leo
lioness
lionet
tiger, Panthera_tigris
Bengal_tiger
tigress
liger
tiglon, tigon
cheetah, chetah, Acinonyx_jubatus
saber-toothed_tiger, sabertooth
Smiledon_californicus
bear
brown_bear, bruin, Ursus_arctos
bruin
Syrian_bear, Ursus_arctos_syriacus
grizzly, grizzly_bear, silvertip, silver-tip, Ursus_horribilis, Ursus_arctos_horribilis
Alaskan_brown_bear, Kodiak_bear, Kodiak, Ursus_middendorffi, Ursus_arctos_middendorffi
American_black_bear, black_bear, Ursus_americanus, Euarctos_americanus
cinnamon_bear
Asiatic_black_bear, black_bear, Ursus_thibetanus, Selenarctos_thibetanus
ice_bear, polar_bear, Ursus_Maritimus, Thalarctos_maritimus
sloth_bear, Melursus_ursinus, Ursus_ursinus
viverrine, viverrine_mammal
civet, civet_cat
large_civet, Viverra_zibetha
small_civet, Viverricula_indica, Viverricula_malaccensis
binturong, bearcat, Arctictis_bintourong
Cryptoprocta, genus_Cryptoprocta
fossa, fossa_cat, Cryptoprocta_ferox
fanaloka, Fossa_fossa
genet, Genetta_genetta
banded_palm_civet, Hemigalus_hardwickii
mongoose
Indian_mongoose, Herpestes_nyula
ichneumon, Herpestes_ichneumon
palm_cat, palm_civet
meerkat, mierkat
slender-tailed_meerkat, Suricata_suricatta
suricate, Suricata_tetradactyla
bat, chiropteran
fruit_bat, megabat
flying_fox
Pteropus_capestratus
Pteropus_hypomelanus
harpy, harpy_bat, tube-nosed_bat, tube-nosed_fruit_bat
Cynopterus_sphinx
carnivorous_bat, microbat
mouse-eared_bat
leafnose_bat, leaf-nosed_bat
macrotus, Macrotus_californicus
spearnose_bat
Phyllostomus_hastatus
hognose_bat, Choeronycteris_mexicana
horseshoe_bat
horseshoe_bat
orange_bat, orange_horseshoe_bat, Rhinonicteris_aurantius
false_vampire, false_vampire_bat
big-eared_bat, Megaderma_lyra
vespertilian_bat, vespertilionid
frosted_bat, Vespertilio_murinus
red_bat, Lasiurus_borealis
brown_bat
little_brown_bat, little_brown_myotis, Myotis_leucifugus
cave_myotis, Myotis_velifer
big_brown_bat, Eptesicus_fuscus
serotine, European_brown_bat, Eptesicus_serotinus
pallid_bat, cave_bat, Antrozous_pallidus
pipistrelle, pipistrel, Pipistrellus_pipistrellus
eastern_pipistrel, Pipistrellus_subflavus
jackass_bat, spotted_bat, Euderma_maculata
long-eared_bat
western_big-eared_bat, Plecotus_townsendi
freetail, free-tailed_bat, freetailed_bat
guano_bat, Mexican_freetail_bat, Tadarida_brasiliensis
pocketed_bat, pocketed_freetail_bat, Tadirida_femorosacca
mastiff_bat
vampire_bat, true_vampire_bat
Desmodus_rotundus
hairy-legged_vampire_bat, Diphylla_ecaudata
predator, predatory_animal
prey, quarry
game
big_game
game_bird
fossorial_mammal
tetrapod
quadruped
hexapod
biped
insect
social_insect
holometabola, metabola
defoliator
pollinator
gallfly
scorpion_fly
hanging_fly
collembolan, springtail
beetle
tiger_beetle
ladybug, ladybeetle, lady_beetle, ladybird, ladybird_beetle
two-spotted_ladybug, Adalia_bipunctata
Mexican_bean_beetle, bean_beetle, Epilachna_varivestis
Hippodamia_convergens
vedalia, Rodolia_cardinalis
ground_beetle, carabid_beetle
bombardier_beetle
calosoma
searcher, searcher_beetle, Calosoma_scrutator
firefly, lightning_bug
glowworm
long-horned_beetle, longicorn, longicorn_beetle
sawyer, sawyer_beetle
pine_sawyer
leaf_beetle, chrysomelid
flea_beetle
Colorado_potato_beetle, Colorado_beetle, potato_bug, potato_beetle, Leptinotarsa_decemlineata
carpet_beetle, carpet_bug
buffalo_carpet_beetle, Anthrenus_scrophulariae
black_carpet_beetle
clerid_beetle, clerid
bee_beetle
lamellicorn_beetle
scarabaeid_beetle, scarabaeid, scarabaean
dung_beetle
scarab, scarabaeus, Scarabaeus_sacer
tumblebug
dorbeetle
June_beetle, June_bug, May_bug, May_beetle
green_June_beetle, figeater
Japanese_beetle, Popillia_japonica
Oriental_beetle, Asiatic_beetle, Anomala_orientalis
rhinoceros_beetle
melolonthid_beetle
cockchafer, May_bug, May_beetle, Melolontha_melolontha
rose_chafer, rose_bug, Macrodactylus_subspinosus
rose_chafer, rose_beetle, Cetonia_aurata
stag_beetle
elaterid_beetle, elater, elaterid
click_beetle, skipjack, snapping_beetle
firefly, fire_beetle, Pyrophorus_noctiluca
wireworm
water_beetle
whirligig_beetle
deathwatch_beetle, deathwatch, Xestobium_rufovillosum
weevil
snout_beetle
boll_weevil, Anthonomus_grandis
blister_beetle, meloid
oil_beetle
Spanish_fly
Dutch-elm_beetle, Scolytus_multistriatus
bark_beetle
spruce_bark_beetle, Dendroctonus_rufipennis
rove_beetle
darkling_beetle, darkling_groung_beetle, tenebrionid
mealworm
flour_beetle, flour_weevil
seed_beetle, seed_weevil
pea_weevil, Bruchus_pisorum
bean_weevil, Acanthoscelides_obtectus
rice_weevil, black_weevil, Sitophylus_oryzae
Asian_longhorned_beetle, Anoplophora_glabripennis
web_spinner
louse, sucking_louse
common_louse, Pediculus_humanus
head_louse, Pediculus_capitis
body_louse, cootie, Pediculus_corporis
crab_louse, pubic_louse, crab, Phthirius_pubis
bird_louse, biting_louse, louse
flea
Pulex_irritans
dog_flea, Ctenocephalides_canis
cat_flea, Ctenocephalides_felis
chigoe, chigger, chigoe_flea, Tunga_penetrans
sticktight, sticktight_flea, Echidnophaga_gallinacea
dipterous_insect, two-winged_insects, dipteran, dipteron
gall_midge, gallfly, gall_gnat
Hessian_fly, Mayetiola_destructor
fly
housefly, house_fly, Musca_domestica
tsetse_fly, tsetse, tzetze_fly, tzetze, glossina
blowfly, blow_fly
bluebottle, Calliphora_vicina
greenbottle, greenbottle_fly
flesh_fly, Sarcophaga_carnaria
tachina_fly
gadfly
botfly
human_botfly, Dermatobia_hominis
sheep_botfly, sheep_gadfly, Oestrus_ovis
warble_fly
horsefly, cleg, clegg, horse_fly
bee_fly
robber_fly, bee_killer
fruit_fly, pomace_fly
apple_maggot, railroad_worm, Rhagoletis_pomonella
Mediterranean_fruit_fly, medfly, Ceratitis_capitata
drosophila, Drosophila_melanogaster
vinegar_fly
leaf_miner, leaf-miner
louse_fly, hippoboscid
horse_tick, horsefly, Hippobosca_equina
sheep_ked, sheep-tick, sheep_tick, Melophagus_Ovinus
horn_fly, Haematobia_irritans
mosquito
wiggler, wriggler
gnat
yellow-fever_mosquito, Aedes_aegypti
Asian_tiger_mosquito, Aedes_albopictus
anopheline
malarial_mosquito, malaria_mosquito
common_mosquito, Culex_pipiens
Culex_quinquefasciatus, Culex_fatigans
gnat
punkie, punky, punkey, no-see-um, biting_midge
midge
fungus_gnat
psychodid
sand_fly, sandfly, Phlebotomus_papatasii
fungus_gnat, sciara, sciarid
armyworm
crane_fly, daddy_longlegs
blackfly, black_fly, buffalo_gnat
hymenopterous_insect, hymenopteran, hymenopteron, hymenopter
bee
drone
queen_bee
worker
soldier
worker_bee
honeybee, Apis_mellifera
Africanized_bee, Africanized_honey_bee, killer_bee, Apis_mellifera_scutellata, Apis_mellifera_adansonii
black_bee, German_bee
Carniolan_bee
Italian_bee
carpenter_bee
bumblebee, humblebee
cuckoo-bumblebee
andrena, andrenid, mining_bee
Nomia_melanderi, alkali_bee
leaf-cutting_bee, leaf-cutter, leaf-cutter_bee
mason_bee
potter_bee
wasp
vespid, vespid_wasp
paper_wasp
hornet
giant_hornet, Vespa_crabro
common_wasp, Vespula_vulgaris
bald-faced_hornet, white-faced_hornet, Vespula_maculata
yellow_jacket, yellow_hornet, Vespula_maculifrons
Polistes_annularis
mason_wasp
potter_wasp
Mutillidae, family_Mutillidae
velvet_ant
sphecoid_wasp, sphecoid
mason_wasp
digger_wasp
cicada_killer, Sphecius_speciosis
mud_dauber
gall_wasp, gallfly, cynipid_wasp, cynipid_gall_wasp
chalcid_fly, chalcidfly, chalcid, chalcid_wasp
strawworm, jointworm
chalcis_fly
ichneumon_fly
sawfly
birch_leaf_miner, Fenusa_pusilla
ant, emmet, pismire
pharaoh_ant, pharaoh's_ant, Monomorium_pharaonis
little_black_ant, Monomorium_minimum
army_ant, driver_ant, legionary_ant
carpenter_ant
fire_ant
wood_ant, Formica_rufa
slave_ant
Formica_fusca
slave-making_ant, slave-maker
sanguinary_ant, Formica_sanguinea
bulldog_ant
Amazon_ant, Polyergus_rufescens
termite, white_ant
dry-wood_termite
Reticulitermes_lucifugus
Mastotermes_darwiniensis
Mastotermes_electrodominicus
powder-post_termite, Cryptotermes_brevis
orthopterous_insect, orthopteron, orthopteran
grasshopper, hopper
short-horned_grasshopper, acridid
locust
migratory_locust, Locusta_migratoria
migratory_grasshopper
long-horned_grasshopper, tettigoniid
katydid
mormon_cricket, Anabrus_simplex
sand_cricket, Jerusalem_cricket, Stenopelmatus_fuscus
cricket
mole_cricket
European_house_cricket, Acheta_domestica
field_cricket, Acheta_assimilis
tree_cricket
snowy_tree_cricket, Oecanthus_fultoni
phasmid, phasmid_insect
walking_stick, walkingstick, stick_insect
diapheromera, Diapheromera_femorata
walking_leaf, leaf_insect
cockroach, roach
oriental_cockroach, oriental_roach, Asiatic_cockroach, blackbeetle, Blatta_orientalis
American_cockroach, Periplaneta_americana
Australian_cockroach, Periplaneta_australasiae
German_cockroach, Croton_bug, crotonbug, water_bug, Blattella_germanica
giant_cockroach
mantis, mantid
praying_mantis, praying_mantid, Mantis_religioso
bug
hemipterous_insect, bug, hemipteran, hemipteron
leaf_bug, plant_bug
mirid_bug, mirid, capsid
four-lined_plant_bug, four-lined_leaf_bug, Poecilocapsus_lineatus
lygus_bug
tarnished_plant_bug, Lygus_lineolaris
lace_bug
lygaeid, lygaeid_bug
chinch_bug, Blissus_leucopterus
coreid_bug, coreid
squash_bug, Anasa_tristis
leaf-footed_bug, leaf-foot_bug
bedbug, bed_bug, chinch, Cimex_lectularius
backswimmer, Notonecta_undulata
true_bug
heteropterous_insect
water_bug
giant_water_bug
water_scorpion
water_boatman, boat_bug
water_strider, pond-skater, water_skater
common_pond-skater, Gerris_lacustris
assassin_bug, reduviid
conenose, cone-nosed_bug, conenose_bug, big_bedbug, kissing_bug
wheel_bug, Arilus_cristatus
firebug
cotton_stainer
homopterous_insect, homopteran
whitefly
citrus_whitefly, Dialeurodes_citri
greenhouse_whitefly, Trialeurodes_vaporariorum
sweet-potato_whitefly
superbug, Bemisia_tabaci, poinsettia_strain
cotton_strain
coccid_insect
scale_insect
soft_scale
brown_soft_scale, Coccus_hesperidum
armored_scale
San_Jose_scale, Aspidiotus_perniciosus
cochineal_insect, cochineal, Dactylopius_coccus
mealybug, mealy_bug
citrophilous_mealybug, citrophilus_mealybug, Pseudococcus_fragilis
Comstock_mealybug, Comstock's_mealybug, Pseudococcus_comstocki
citrus_mealybug, Planococcus_citri
plant_louse, louse
aphid
apple_aphid, green_apple_aphid, Aphis_pomi
blackfly, bean_aphid, Aphis_fabae
greenfly
green_peach_aphid
ant_cow
woolly_aphid, woolly_plant_louse
woolly_apple_aphid, American_blight, Eriosoma_lanigerum
woolly_alder_aphid, Prociphilus_tessellatus
adelgid
balsam_woolly_aphid, Adelges_piceae
spruce_gall_aphid, Adelges_abietis
woolly_adelgid
jumping_plant_louse, psylla, psyllid
cicada, cicala
dog-day_cicada, harvest_fly
seventeen-year_locust, periodical_cicada, Magicicada_septendecim
spittle_insect, spittlebug
froghopper
meadow_spittlebug, Philaenus_spumarius
pine_spittlebug
Saratoga_spittlebug, Aphrophora_saratogensis
leafhopper
plant_hopper, planthopper
treehopper
lantern_fly, lantern-fly
psocopterous_insect
psocid
bark-louse, bark_louse
booklouse, book_louse, deathwatch, Liposcelis_divinatorius
common_booklouse, Trogium_pulsatorium
ephemerid, ephemeropteran
mayfly, dayfly, shadfly
stonefly, stone_fly, plecopteran
neuropteron, neuropteran, neuropterous_insect
ant_lion, antlion, antlion_fly
doodlebug, ant_lion, antlion
lacewing, lacewing_fly
aphid_lion, aphis_lion
green_lacewing, chrysopid, stink_fly
brown_lacewing, hemerobiid, hemerobiid_fly
dobson, dobsonfly, dobson_fly, Corydalus_cornutus
hellgrammiate, dobson
fish_fly, fish-fly
alderfly, alder_fly, Sialis_lutaria
snakefly
mantispid
odonate
dragonfly, darning_needle, devil's_darning_needle, sewing_needle, snake_feeder, snake_doctor, mosquito_hawk, skeeter_hawk
damselfly
trichopterous_insect, trichopteran, trichopteron
caddis_fly, caddis-fly, caddice_fly, caddice-fly
caseworm
caddisworm, strawworm
thysanuran_insect, thysanuron
bristletail
silverfish, Lepisma_saccharina
firebrat, Thermobia_domestica
jumping_bristletail, machilid
thysanopter, thysanopteron, thysanopterous_insect
thrips, thrip, thripid
tobacco_thrips, Frankliniella_fusca
onion_thrips, onion_louse, Thrips_tobaci
earwig
common_European_earwig, Forficula_auricularia
lepidopterous_insect, lepidopteron, lepidopteran
butterfly
nymphalid, nymphalid_butterfly, brush-footed_butterfly, four-footed_butterfly
mourning_cloak, mourning_cloak_butterfly, Camberwell_beauty, Nymphalis_antiopa
tortoiseshell, tortoiseshell_butterfly
painted_beauty, Vanessa_virginiensis
admiral
red_admiral, Vanessa_atalanta
white_admiral, Limenitis_camilla
banded_purple, white_admiral, Limenitis_arthemis
red-spotted_purple, Limenitis_astyanax
viceroy, Limenitis_archippus
anglewing
ringlet, ringlet_butterfly
comma, comma_butterfly, Polygonia_comma
fritillary
silverspot
emperor_butterfly, emperor
purple_emperor, Apatura_iris
peacock, peacock_butterfly, Inachis_io
danaid, danaid_butterfly
monarch, monarch_butterfly, milkweed_butterfly, Danaus_plexippus
pierid, pierid_butterfly
cabbage_butterfly
small_white, Pieris_rapae
large_white, Pieris_brassicae
southern_cabbage_butterfly, Pieris_protodice
sulphur_butterfly, sulfur_butterfly
lycaenid, lycaenid_butterfly
blue
copper
American_copper, Lycaena_hypophlaeas
hairstreak, hairstreak_butterfly
Strymon_melinus
moth
moth_miller, miller
tortricid, tortricid_moth
leaf_roller, leaf-roller
tea_tortrix, tortrix, Homona_coffearia
orange_tortrix, tortrix, Argyrotaenia_citrana
codling_moth, codlin_moth, Carpocapsa_pomonella
lymantriid, tussock_moth
tussock_caterpillar
gypsy_moth, gipsy_moth, Lymantria_dispar
browntail, brown-tail_moth, Euproctis_phaeorrhoea
gold-tail_moth, Euproctis_chrysorrhoea
geometrid, geometrid_moth
Paleacrita_vernata
Alsophila_pometaria
cankerworm
spring_cankerworm
fall_cankerworm
measuring_worm, inchworm, looper
pyralid, pyralid_moth
bee_moth, wax_moth, Galleria_mellonella
corn_borer, European_corn_borer_moth, corn_borer_moth, Pyrausta_nubilalis
Mediterranean_flour_moth, Anagasta_kuehniella
tobacco_moth, cacao_moth, Ephestia_elutella
almond_moth, fig_moth, Cadra_cautella
raisin_moth, Cadra_figulilella
tineoid, tineoid_moth
tineid, tineid_moth
clothes_moth
casemaking_clothes_moth, Tinea_pellionella
webbing_clothes_moth, webbing_moth, Tineola_bisselliella
carpet_moth, tapestry_moth, Trichophaga_tapetzella
gelechiid, gelechiid_moth
grain_moth
angoumois_moth, angoumois_grain_moth, Sitotroga_cerealella
potato_moth, potato_tuber_moth, splitworm, Phthorimaea_operculella
potato_tuberworm, Phthorimaea_operculella
noctuid_moth, noctuid, owlet_moth
cutworm
underwing
red_underwing, Catocala_nupta
antler_moth, Cerapteryx_graminis
heliothis_moth, Heliothis_zia
army_cutworm, Chorizagrotis_auxiliaris
armyworm, Pseudaletia_unipuncta
armyworm, army_worm, Pseudaletia_unipuncta
Spodoptera_exigua
beet_armyworm, Spodoptera_exigua
Spodoptera_frugiperda
fall_armyworm, Spodoptera_frugiperda
hawkmoth, hawk_moth, sphingid, sphinx_moth, hummingbird_moth
Manduca_sexta
tobacco_hornworm, tomato_worm, Manduca_sexta
Manduca_quinquemaculata
tomato_hornworm, potato_worm, Manduca_quinquemaculata
death's-head_moth, Acherontia_atropos
bombycid, bombycid_moth, silkworm_moth
domestic_silkworm_moth, domesticated_silkworm_moth, Bombyx_mori
silkworm
saturniid, saturniid_moth
emperor, emperor_moth, Saturnia_pavonia
imperial_moth, Eacles_imperialis
giant_silkworm_moth, silkworm_moth
silkworm, giant_silkworm, wild_wilkworm
luna_moth, Actias_luna
cecropia, cecropia_moth, Hyalophora_cecropia
cynthia_moth, Samia_cynthia, Samia_walkeri
ailanthus_silkworm, Samia_cynthia
io_moth, Automeris_io
polyphemus_moth, Antheraea_polyphemus
pernyi_moth, Antheraea_pernyi
tussah, tusseh, tussur, tussore, tusser, Antheraea_mylitta
atlas_moth, Atticus_atlas
arctiid, arctiid_moth
tiger_moth
cinnabar, cinnabar_moth, Callimorpha_jacobeae
lasiocampid, lasiocampid_moth
eggar, egger
tent-caterpillar_moth, Malacosoma_americana
tent_caterpillar
tent-caterpillar_moth, Malacosoma_disstria
forest_tent_caterpillar, Malacosoma_disstria
lappet, lappet_moth
lappet_caterpillar
webworm
webworm_moth
Hyphantria_cunea
fall_webworm, Hyphantria_cunea
garden_webworm, Loxostege_similalis
instar
caterpillar
corn_borer, Pyrausta_nubilalis
bollworm
pink_bollworm, Gelechia_gossypiella
corn_earworm, cotton_bollworm, tomato_fruitworm, tobacco_budworm, vetchworm, Heliothis_zia
cabbageworm, Pieris_rapae
woolly_bear, woolly_bear_caterpillar
woolly_bear_moth
larva
nymph
leptocephalus
grub
maggot
leatherjacket
pupa
chrysalis
imago
queen
phoronid
bryozoan, polyzoan, sea_mat, sea_moss, moss_animal
brachiopod, lamp_shell, lampshell
peanut_worm, sipunculid
echinoderm
starfish, sea_star
brittle_star, brittle-star, serpent_star
basket_star, basket_fish
Astrophyton_muricatum
sea_urchin
edible_sea_urchin, Echinus_esculentus
sand_dollar
heart_urchin
crinoid
sea_lily
feather_star, comatulid
sea_cucumber, holothurian
trepang, Holothuria_edulis
Duplicidentata
lagomorph, gnawing_mammal
leporid, leporid_mammal
rabbit, coney, cony
rabbit_ears
lapin
bunny, bunny_rabbit
European_rabbit, Old_World_rabbit, Oryctolagus_cuniculus
wood_rabbit, cottontail, cottontail_rabbit
eastern_cottontail, Sylvilagus_floridanus
swamp_rabbit, canecutter, swamp_hare, Sylvilagus_aquaticus
marsh_hare, swamp_rabbit, Sylvilagus_palustris
hare
leveret
European_hare, Lepus_europaeus
jackrabbit
white-tailed_jackrabbit, whitetail_jackrabbit, Lepus_townsendi
blacktail_jackrabbit, Lepus_californicus
polar_hare, Arctic_hare, Lepus_arcticus
snowshoe_hare, snowshoe_rabbit, varying_hare, Lepus_americanus
Belgian_hare, leporide
Angora, Angora_rabbit
pika, mouse_hare, rock_rabbit, coney, cony
little_chief_hare, Ochotona_princeps
collared_pika, Ochotona_collaris
rodent, gnawer
mouse
rat
pocket_rat
murine
house_mouse, Mus_musculus
harvest_mouse, Micromyx_minutus
field_mouse, fieldmouse
nude_mouse
European_wood_mouse, Apodemus_sylvaticus
brown_rat, Norway_rat, Rattus_norvegicus
wharf_rat
sewer_rat
black_rat, roof_rat, Rattus_rattus
bandicoot_rat, mole_rat
jerboa_rat
kangaroo_mouse
water_rat
beaver_rat
New_World_mouse
American_harvest_mouse, harvest_mouse
wood_mouse
white-footed_mouse, vesper_mouse, Peromyscus_leucopus
deer_mouse, Peromyscus_maniculatus
cactus_mouse, Peromyscus_eremicus
cotton_mouse, Peromyscus_gossypinus
pygmy_mouse, Baiomys_taylori
grasshopper_mouse
muskrat, musquash, Ondatra_zibethica
round-tailed_muskrat, Florida_water_rat, Neofiber_alleni
cotton_rat, Sigmodon_hispidus
wood_rat, wood-rat
dusky-footed_wood_rat
vole, field_mouse
packrat, pack_rat, trade_rat, bushytail_woodrat, Neotoma_cinerea
dusky-footed_woodrat, Neotoma_fuscipes
eastern_woodrat, Neotoma_floridana
rice_rat, Oryzomys_palustris
pine_vole, pine_mouse, Pitymys_pinetorum
meadow_vole, meadow_mouse, Microtus_pennsylvaticus
water_vole, Richardson_vole, Microtus_richardsoni
prairie_vole, Microtus_ochrogaster
water_vole, water_rat, Arvicola_amphibius
red-backed_mouse, redback_vole
phenacomys
hamster
Eurasian_hamster, Cricetus_cricetus
golden_hamster, Syrian_hamster, Mesocricetus_auratus
gerbil, gerbille
jird
tamarisk_gerbil, Meriones_unguiculatus
sand_rat, Meriones_longifrons
lemming
European_lemming, Lemmus_lemmus
brown_lemming, Lemmus_trimucronatus
grey_lemming, gray_lemming, red-backed_lemming
pied_lemming
Hudson_bay_collared_lemming, Dicrostonyx_hudsonius
southern_bog_lemming, Synaptomys_cooperi
northern_bog_lemming, Synaptomys_borealis
porcupine, hedgehog
Old_World_porcupine
brush-tailed_porcupine, brush-tail_porcupine
long-tailed_porcupine, Trichys_lipura
New_World_porcupine
Canada_porcupine, Erethizon_dorsatum
pocket_mouse
silky_pocket_mouse, Perognathus_flavus
plains_pocket_mouse, Perognathus_flavescens
hispid_pocket_mouse, Perognathus_hispidus
Mexican_pocket_mouse, Liomys_irroratus
kangaroo_rat, desert_rat, Dipodomys_phillipsii
Ord_kangaroo_rat, Dipodomys_ordi
kangaroo_mouse, dwarf_pocket_rat
jumping_mouse
meadow_jumping_mouse, Zapus_hudsonius
jerboa
typical_jerboa
Jaculus_jaculus
dormouse
loir, Glis_glis
hazel_mouse, Muscardinus_avellanarius
lerot
gopher, pocket_gopher, pouched_rat
plains_pocket_gopher, Geomys_bursarius
southeastern_pocket_gopher, Geomys_pinetis
valley_pocket_gopher, Thomomys_bottae
northern_pocket_gopher, Thomomys_talpoides
squirrel
tree_squirrel
eastern_grey_squirrel, eastern_gray_squirrel, cat_squirrel, Sciurus_carolinensis
western_grey_squirrel, western_gray_squirrel, Sciurus_griseus
fox_squirrel, eastern_fox_squirrel, Sciurus_niger
black_squirrel
red_squirrel, cat_squirrel, Sciurus_vulgaris
American_red_squirrel, spruce_squirrel, red_squirrel, Sciurus_hudsonicus, Tamiasciurus_hudsonicus
chickeree, Douglas_squirrel, Tamiasciurus_douglasi
antelope_squirrel, whitetail_antelope_squirrel, antelope_chipmunk, Citellus_leucurus
ground_squirrel, gopher, spermophile
mantled_ground_squirrel, Citellus_lateralis
suslik, souslik, Citellus_citellus
flickertail, Richardson_ground_squirrel, Citellus_richardsoni
rock_squirrel, Citellus_variegatus
Arctic_ground_squirrel, parka_squirrel, Citellus_parryi
prairie_dog, prairie_marmot
blacktail_prairie_dog, Cynomys_ludovicianus
whitetail_prairie_dog, Cynomys_gunnisoni
eastern_chipmunk, hackee, striped_squirrel, ground_squirrel, Tamias_striatus
chipmunk
baronduki, baranduki, barunduki, burunduki, Eutamius_asiaticus, Eutamius_sibiricus
American_flying_squirrel
southern_flying_squirrel, Glaucomys_volans
northern_flying_squirrel, Glaucomys_sabrinus
marmot
groundhog, woodchuck, Marmota_monax
hoary_marmot, whistler, whistling_marmot, Marmota_caligata
yellowbelly_marmot, rockchuck, Marmota_flaviventris
Asiatic_flying_squirrel
beaver
Old_World_beaver, Castor_fiber
New_World_beaver, Castor_canadensis
mountain_beaver, sewellel, Aplodontia_rufa
cavy
guinea_pig, Cavia_cobaya
aperea, wild_cavy, Cavia_porcellus
mara, Dolichotis_patagonum
capybara, capibara, Hydrochoerus_hydrochaeris
agouti, Dasyprocta_aguti
paca, Cuniculus_paca
mountain_paca
coypu, nutria, Myocastor_coypus
chinchilla, Chinchilla_laniger
mountain_chinchilla, mountain_viscacha
viscacha, chinchillon, Lagostomus_maximus
abrocome, chinchilla_rat, rat_chinchilla
mole_rat
mole_rat
sand_rat
naked_mole_rat
queen, queen_mole_rat
Damaraland_mole_rat
Ungulata
ungulate, hoofed_mammal
unguiculate, unguiculate_mammal
dinoceras, uintathere
hyrax, coney, cony, dassie, das
rock_hyrax, rock_rabbit, Procavia_capensis
odd-toed_ungulate, perissodactyl, perissodactyl_mammal
equine, equid
horse, Equus_caballus
roan
stablemate, stable_companion
gee-gee
eohippus, dawn_horse
foal
filly
colt
male_horse
ridgeling, ridgling, ridgel, ridgil
stallion, entire
stud, studhorse
gelding
mare, female_horse
broodmare, stud_mare
saddle_horse, riding_horse, mount
remount
palfrey
warhorse
cavalry_horse
charger, courser
steed
prancer
hack
cow_pony
quarter_horse
Morgan
Tennessee_walker, Tennessee_walking_horse, Walking_horse, Plantation_walking_horse
American_saddle_horse
Appaloosa
Arabian, Arab
Lippizan, Lipizzan, Lippizaner
pony
polo_pony
mustang
bronco, bronc, broncho
bucking_bronco
buckskin
crowbait, crow-bait
dun
grey, gray
wild_horse
tarpan, Equus_caballus_gomelini
Przewalski's_horse, Przevalski's_horse, Equus_caballus_przewalskii, Equus_caballus_przevalskii
cayuse, Indian_pony
hack
hack, jade, nag, plug
plow_horse, plough_horse
pony
Shetland_pony
Welsh_pony
Exmoor
racehorse, race_horse, bangtail
thoroughbred
steeplechaser
racer
finisher
pony
yearling
dark_horse
mudder
nonstarter
stalking-horse
harness_horse
cob
hackney
workhorse
draft_horse, draught_horse, dray_horse
packhorse
carthorse, cart_horse, drayhorse
Clydesdale
Percheron
farm_horse, dobbin
shire, shire_horse
pole_horse, poler
post_horse, post-horse, poster
coach_horse
pacer
pacer, pacemaker, pacesetter
trotting_horse, trotter
pole_horse
stepper, high_stepper
chestnut
liver_chestnut
bay
sorrel
palomino
pinto
ass
domestic_ass, donkey, Equus_asinus
burro
moke
jack, jackass
jennet, jenny, jenny_ass
mule
hinny
wild_ass
African_wild_ass, Equus_asinus
kiang, Equus_kiang
onager, Equus_hemionus
chigetai, dziggetai, Equus_hemionus_hemionus
zebra
common_zebra, Burchell's_zebra, Equus_Burchelli
mountain_zebra, Equus_zebra_zebra
grevy's_zebra, Equus_grevyi
quagga, Equus_quagga
rhinoceros, rhino
Indian_rhinoceros, Rhinoceros_unicornis
woolly_rhinoceros, Rhinoceros_antiquitatis
white_rhinoceros, Ceratotherium_simum, Diceros_simus
black_rhinoceros, Diceros_bicornis
tapir
New_World_tapir, Tapirus_terrestris
Malayan_tapir, Indian_tapir, Tapirus_indicus
even-toed_ungulate, artiodactyl, artiodactyl_mammal
swine
hog, pig, grunter, squealer, Sus_scrofa
piglet, piggy, shoat, shote
sucking_pig
porker
boar
sow
razorback, razorback_hog, razorbacked_hog
wild_boar, boar, Sus_scrofa
babirusa, babiroussa, babirussa, Babyrousa_Babyrussa
warthog
peccary, musk_hog
collared_peccary, javelina, Tayassu_angulatus, Tayassu_tajacu, Peccari_angulatus
white-lipped_peccary, Tayassu_pecari
hippopotamus, hippo, river_horse, Hippopotamus_amphibius
ruminant
bovid
bovine
ox, wild_ox
cattle, cows, kine, oxen, Bos_taurus
ox
stirk
bullock, steer
bull
cow, moo-cow
heifer
bullock
dogie, dogy, leppy
maverick
beef, beef_cattle
longhorn, Texas_longhorn
Brahman, Brahma, Brahmin, Bos_indicus
zebu
aurochs, urus, Bos_primigenius
yak, Bos_grunniens
banteng, banting, tsine, Bos_banteng
Welsh, Welsh_Black
red_poll
Santa_Gertrudis
Aberdeen_Angus, Angus, black_Angus
Africander
dairy_cattle, dairy_cow, milch_cow, milk_cow, milcher, milker
Ayrshire
Brown_Swiss
Charolais
Jersey
Devon
grade
Durham, shorthorn
milking_shorthorn
Galloway
Friesian, Holstein, Holstein-Friesian
Guernsey
Hereford, whiteface
cattalo, beefalo
Old_World_buffalo, buffalo
water_buffalo, water_ox, Asiatic_buffalo, Bubalus_bubalis
Indian_buffalo
carabao
anoa, dwarf_buffalo, Anoa_depressicornis
tamarau, tamarao, Bubalus_mindorensis, Anoa_mindorensis
Cape_buffalo, Synercus_caffer
Asian_wild_ox
gaur, Bibos_gaurus
gayal, mithan, Bibos_frontalis
bison
American_bison, American_buffalo, buffalo, Bison_bison
wisent, aurochs, Bison_bonasus
musk_ox, musk_sheep, Ovibos_moschatus
sheep
ewe
ram, tup
wether
lamb
lambkin
baa-lamb
hog, hogget, hogg
teg
Persian_lamb
black_sheep
domestic_sheep, Ovis_aries
Cotswold
Hampshire, Hampshire_down
Lincoln
Exmoor
Cheviot
broadtail, caracul, karakul
longwool
merino, merino_sheep
Rambouillet
wild_sheep
argali, argal, Ovis_ammon
Marco_Polo_sheep, Marco_Polo's_sheep, Ovis_poli
urial, Ovis_vignei
Dall_sheep, Dall's_sheep, white_sheep, Ovis_montana_dalli
mountain_sheep
bighorn, bighorn_sheep, cimarron, Rocky_Mountain_bighorn, Rocky_Mountain_sheep, Ovis_canadensis
mouflon, moufflon, Ovis_musimon
aoudad, arui, audad, Barbary_sheep, maned_sheep, Ammotragus_lervia
goat, caprine_animal
kid
billy, billy_goat, he-goat
nanny, nanny-goat, she-goat
domestic_goat, Capra_hircus
Cashmere_goat, Kashmir_goat
Angora, Angora_goat
wild_goat
bezoar_goat, pasang, Capra_aegagrus
markhor, markhoor, Capra_falconeri
ibex, Capra_ibex
goat_antelope
mountain_goat, Rocky_Mountain_goat, Oreamnos_americanus
goral, Naemorhedus_goral
serow
chamois, Rupicapra_rupicapra
takin, gnu_goat, Budorcas_taxicolor
antelope
blackbuck, black_buck, Antilope_cervicapra
gerenuk, Litocranius_walleri
addax, Addax_nasomaculatus
gnu, wildebeest
dik-dik
hartebeest
sassaby, topi, Damaliscus_lunatus
impala, Aepyceros_melampus
gazelle
Thomson's_gazelle, Gazella_thomsoni
Gazella_subgutturosa
springbok, springbuck, Antidorcas_marsupialis, Antidorcas_euchore
bongo, Tragelaphus_eurycerus, Boocercus_eurycerus
kudu, koodoo, koudou
greater_kudu, Tragelaphus_strepsiceros
lesser_kudu, Tragelaphus_imberbis
harnessed_antelope
nyala, Tragelaphus_angasi
mountain_nyala, Tragelaphus_buxtoni
bushbuck, guib, Tragelaphus_scriptus
nilgai, nylghai, nylghau, blue_bull, Boselaphus_tragocamelus
sable_antelope, Hippotragus_niger
saiga, Saiga_tatarica
steenbok, steinbok, Raphicerus_campestris
eland
common_eland, Taurotragus_oryx
giant_eland, Taurotragus_derbianus
kob, Kobus_kob
lechwe, Kobus_leche
waterbuck
puku, Adenota_vardoni
oryx, pasang
gemsbok, gemsbuck, Oryx_gazella
forest_goat, spindle_horn, Pseudoryx_nghetinhensis
pronghorn, prongbuck, pronghorn_antelope, American_antelope, Antilocapra_americana
deer, cervid
stag
royal, royal_stag
pricket
fawn
red_deer, elk, American_elk, wapiti, Cervus_elaphus
hart, stag
hind
brocket
sambar, sambur, Cervus_unicolor
wapiti, elk, American_elk, Cervus_elaphus_canadensis
Japanese_deer, sika, Cervus_nipon, Cervus_sika
Virginia_deer, white_tail, whitetail, white-tailed_deer, whitetail_deer, Odocoileus_Virginianus
mule_deer, burro_deer, Odocoileus_hemionus
black-tailed_deer, blacktail_deer, blacktail, Odocoileus_hemionus_columbianus
elk, European_elk, moose, Alces_alces
fallow_deer, Dama_dama
roe_deer, Capreolus_capreolus
roebuck
caribou, reindeer, Greenland_caribou, Rangifer_tarandus
woodland_caribou, Rangifer_caribou
barren_ground_caribou, Rangifer_arcticus
brocket
muntjac, barking_deer
musk_deer, Moschus_moschiferus
pere_david's_deer, elaphure, Elaphurus_davidianus
chevrotain, mouse_deer
kanchil, Tragulus_kanchil
napu, Tragulus_Javanicus
water_chevrotain, water_deer, Hyemoschus_aquaticus
camel
Arabian_camel, dromedary, Camelus_dromedarius
Bactrian_camel, Camelus_bactrianus
llama
domestic_llama, Lama_peruana
guanaco, Lama_guanicoe
alpaca, Lama_pacos
vicuna, Vicugna_vicugna
giraffe, camelopard, Giraffa_camelopardalis
okapi, Okapia_johnstoni
musteline_mammal, mustelid, musteline
weasel
ermine, shorttail_weasel, Mustela_erminea
stoat
New_World_least_weasel, Mustela_rixosa
Old_World_least_weasel, Mustela_nivalis
longtail_weasel, long-tailed_weasel, Mustela_frenata
mink
American_mink, Mustela_vison
polecat, fitch, foulmart, foumart, Mustela_putorius
ferret
black-footed_ferret, ferret, Mustela_nigripes
muishond
snake_muishond, Poecilogale_albinucha
striped_muishond, Ictonyx_striata
otter
river_otter, Lutra_canadensis
Eurasian_otter, Lutra_lutra
sea_otter, Enhydra_lutris
skunk, polecat, wood_pussy
striped_skunk, Mephitis_mephitis
hooded_skunk, Mephitis_macroura
hog-nosed_skunk, hognosed_skunk, badger_skunk, rooter_skunk, Conepatus_leuconotus
spotted_skunk, little_spotted_skunk, Spilogale_putorius
badger
American_badger, Taxidea_taxus
Eurasian_badger, Meles_meles
ratel, honey_badger, Mellivora_capensis
ferret_badger
hog_badger, hog-nosed_badger, sand_badger, Arctonyx_collaris
wolverine, carcajou, skunk_bear, Gulo_luscus
glutton, Gulo_gulo, wolverine
grison, Grison_vittatus, Galictis_vittatus
marten, marten_cat
pine_marten, Martes_martes
sable, Martes_zibellina
American_marten, American_sable, Martes_americana
stone_marten, beech_marten, Martes_foina
fisher, pekan, fisher_cat, black_cat, Martes_pennanti
yellow-throated_marten, Charronia_flavigula
tayra, taira, Eira_barbara
fictional_animal
pachyderm
edentate
armadillo
peba, nine-banded_armadillo, Texas_armadillo, Dasypus_novemcinctus
apar, three-banded_armadillo, Tolypeutes_tricinctus
tatouay, cabassous, Cabassous_unicinctus
peludo, poyou, Euphractus_sexcinctus
giant_armadillo, tatou, tatu, Priodontes_giganteus
pichiciago, pichiciego, fairy_armadillo, chlamyphore, Chlamyphorus_truncatus
sloth, tree_sloth
three-toed_sloth, ai, Bradypus_tridactylus
two-toed_sloth, unau, unai, Choloepus_didactylus
two-toed_sloth, unau, unai, Choloepus_hoffmanni
megatherian, megatheriid, megatherian_mammal
mylodontid
anteater, New_World_anteater
ant_bear, giant_anteater, great_anteater, tamanoir, Myrmecophaga_jubata
silky_anteater, two-toed_anteater, Cyclopes_didactylus
tamandua, tamandu, lesser_anteater, Tamandua_tetradactyla
pangolin, scaly_anteater, anteater
coronet
scapular
tadpole, polliwog, pollywog
primate
simian
ape
anthropoid
anthropoid_ape
hominoid
hominid
homo, man, human_being, human
world, human_race, humanity, humankind, human_beings, humans, mankind, man
Homo_erectus
Pithecanthropus, Pithecanthropus_erectus, genus_Pithecanthropus
Java_man, Trinil_man
Peking_man
Sinanthropus, genus_Sinanthropus
Homo_soloensis
Javanthropus, genus_Javanthropus
Homo_habilis
Homo_sapiens
Neandertal_man, Neanderthal_man, Neandertal, Neanderthal, Homo_sapiens_neanderthalensis
Cro-magnon
Homo_sapiens_sapiens, modern_man
australopithecine
Australopithecus_afarensis
Australopithecus_africanus
Australopithecus_boisei
Zinjanthropus, genus_Zinjanthropus
Australopithecus_robustus
Paranthropus, genus_Paranthropus
Sivapithecus
rudapithecus, Dryopithecus_Rudapithecus_hungaricus
proconsul
Aegyptopithecus
great_ape, pongid
orangutan, orang, orangutang, Pongo_pygmaeus
gorilla, Gorilla_gorilla
western_lowland_gorilla, Gorilla_gorilla_gorilla
eastern_lowland_gorilla, Gorilla_gorilla_grauri
mountain_gorilla, Gorilla_gorilla_beringei
silverback
chimpanzee, chimp, Pan_troglodytes
western_chimpanzee, Pan_troglodytes_verus
eastern_chimpanzee, Pan_troglodytes_schweinfurthii
central_chimpanzee, Pan_troglodytes_troglodytes
pygmy_chimpanzee, bonobo, Pan_paniscus
lesser_ape
gibbon, Hylobates_lar
siamang, Hylobates_syndactylus, Symphalangus_syndactylus
monkey
Old_World_monkey, catarrhine
guenon, guenon_monkey
talapoin, Cercopithecus_talapoin
grivet, Cercopithecus_aethiops
vervet, vervet_monkey, Cercopithecus_aethiops_pygerythrus
green_monkey, African_green_monkey, Cercopithecus_aethiops_sabaeus
mangabey
patas, hussar_monkey, Erythrocebus_patas
baboon
chacma, chacma_baboon, Papio_ursinus
mandrill, Mandrillus_sphinx
drill, Mandrillus_leucophaeus
macaque
rhesus, rhesus_monkey, Macaca_mulatta
bonnet_macaque, bonnet_monkey, capped_macaque, crown_monkey, Macaca_radiata
Barbary_ape, Macaca_sylvana
crab-eating_macaque, croo_monkey, Macaca_irus
langur
entellus, hanuman, Presbytes_entellus, Semnopithecus_entellus
colobus, colobus_monkey
guereza, Colobus_guereza
proboscis_monkey, Nasalis_larvatus
New_World_monkey, platyrrhine, platyrrhinian
marmoset
true_marmoset
pygmy_marmoset, Cebuella_pygmaea
tamarin, lion_monkey, lion_marmoset, leoncita
silky_tamarin, Leontocebus_rosalia
pinche, Leontocebus_oedipus
capuchin, ringtail, Cebus_capucinus
douroucouli, Aotus_trivirgatus
howler_monkey, howler
saki
uakari
titi, titi_monkey
spider_monkey, Ateles_geoffroyi
squirrel_monkey, Saimiri_sciureus
woolly_monkey
tree_shrew
prosimian
lemur
Madagascar_cat, ring-tailed_lemur, Lemur_catta
aye-aye, Daubentonia_madagascariensis
slender_loris, Loris_gracilis
slow_loris, Nycticebus_tardigradua, Nycticebus_pygmaeus
potto, kinkajou, Perodicticus_potto
angwantibo, golden_potto, Arctocebus_calabarensis
galago, bushbaby, bush_baby
indri, indris, Indri_indri, Indri_brevicaudatus
woolly_indris, Avahi_laniger
tarsier
Tarsius_syrichta
Tarsius_glis
flying_lemur, flying_cat, colugo
Cynocephalus_variegatus
proboscidean, proboscidian
elephant
rogue_elephant
Indian_elephant, Elephas_maximus
African_elephant, Loxodonta_africana
mammoth
woolly_mammoth, northern_mammoth, Mammuthus_primigenius
columbian_mammoth, Mammuthus_columbi
imperial_mammoth, imperial_elephant, Archidiskidon_imperator
mastodon, mastodont
plantigrade_mammal, plantigrade
digitigrade_mammal, digitigrade
procyonid
raccoon, racoon
common_raccoon, common_racoon, coon, ringtail, Procyon_lotor
crab-eating_raccoon, Procyon_cancrivorus
bassarisk, cacomistle, cacomixle, coon_cat, raccoon_fox, ringtail, ring-tailed_cat, civet_cat, miner's_cat, Bassariscus_astutus
kinkajou, honey_bear, potto, Potos_flavus, Potos_caudivolvulus
coati, coati-mondi, coati-mundi, coon_cat, Nasua_narica
lesser_panda, red_panda, panda, bear_cat, cat_bear, Ailurus_fulgens
giant_panda, panda, panda_bear, coon_bear, Ailuropoda_melanoleuca
twitterer
fish
fingerling
game_fish, sport_fish
food_fish
rough_fish
groundfish, bottom_fish
young_fish
parr
mouthbreeder
spawner
barracouta, snoek
crossopterygian, lobefin, lobe-finned_fish
coelacanth, Latimeria_chalumnae
lungfish
ceratodus
catfish, siluriform_fish
silurid, silurid_fish
European_catfish, sheatfish, Silurus_glanis
electric_catfish, Malopterurus_electricus
bullhead, bullhead_catfish
horned_pout, hornpout, pout, Ameiurus_Melas
brown_bullhead
channel_catfish, channel_cat, Ictalurus_punctatus
blue_catfish, blue_cat, blue_channel_catfish, blue_channel_cat
flathead_catfish, mudcat, goujon, shovelnose_catfish, spoonbill_catfish, Pylodictus_olivaris
armored_catfish
sea_catfish
gadoid, gadoid_fish
cod, codfish
codling
Atlantic_cod, Gadus_morhua
Pacific_cod, Alaska_cod, Gadus_macrocephalus
whiting, Merlangus_merlangus, Gadus_merlangus
burbot, eelpout, ling, cusk, Lota_lota
haddock, Melanogrammus_aeglefinus
pollack, pollock, Pollachius_pollachius
hake
silver_hake, Merluccius_bilinearis, whiting
ling
cusk, torsk, Brosme_brosme
grenadier, rattail, rattail_fish
eel
elver
common_eel, freshwater_eel
tuna, Anguilla_sucklandii
moray, moray_eel
conger, conger_eel
teleost_fish, teleost, teleostan
beaked_salmon, sandfish, Gonorhynchus_gonorhynchus
clupeid_fish, clupeid
whitebait
brit, britt
shad
common_American_shad, Alosa_sapidissima
river_shad, Alosa_chrysocloris
allice_shad, allis_shad, allice, allis, Alosa_alosa
alewife, Alosa_pseudoharengus, Pomolobus_pseudoharengus
menhaden, Brevoortia_tyrannis
herring, Clupea_harangus
Atlantic_herring, Clupea_harengus_harengus
Pacific_herring, Clupea_harengus_pallasii
sardine
sild
brisling, sprat, Clupea_sprattus
pilchard, sardine, Sardina_pilchardus
Pacific_sardine, Sardinops_caerulea
anchovy
mediterranean_anchovy, Engraulis_encrasicholus
salmonid
salmon
parr
blackfish
redfish
Atlantic_salmon, Salmo_salar
landlocked_salmon, lake_salmon
sockeye, sockeye_salmon, red_salmon, blueback_salmon, Oncorhynchus_nerka
chinook, chinook_salmon, king_salmon, quinnat_salmon, Oncorhynchus_tshawytscha
coho, cohoe, coho_salmon, blue_jack, silver_salmon, Oncorhynchus_kisutch
trout
brown_trout, salmon_trout, Salmo_trutta
rainbow_trout, Salmo_gairdneri
sea_trout
lake_trout, salmon_trout, Salvelinus_namaycush
brook_trout, speckled_trout, Salvelinus_fontinalis
char, charr
Arctic_char, Salvelinus_alpinus
whitefish
lake_whitefish, Coregonus_clupeaformis
cisco, lake_herring, Coregonus_artedi
round_whitefish, Menominee_whitefish, Prosopium_cylindraceum
smelt
sparling, European_smelt, Osmerus_eperlanus
capelin, capelan, caplin
tarpon, Tarpon_atlanticus
ladyfish, tenpounder, Elops_saurus
bonefish, Albula_vulpes
argentine
lanternfish
lizardfish, snakefish, snake-fish
lancetfish, lancet_fish, wolffish
opah, moonfish, Lampris_regius
New_World_opah, Lampris_guttatus
ribbonfish
dealfish, Trachipterus_arcticus
oarfish, king_of_the_herring, ribbonfish, Regalecus_glesne
batfish
goosefish, angler, anglerfish, angler_fish, monkfish, lotte, allmouth, Lophius_Americanus
toadfish, Opsanus_tau
oyster_fish, oyster-fish, oysterfish
frogfish
sargassum_fish
needlefish, gar, billfish
timucu
flying_fish
monoplane_flying_fish, two-wing_flying_fish
halfbeak
saury, billfish, Scomberesox_saurus
spiny-finned_fish, acanthopterygian
lingcod, Ophiodon_elongatus
percoid_fish, percoid, percoidean
perch
climbing_perch, Anabas_testudineus, A._testudineus
perch
yellow_perch, Perca_flavescens
European_perch, Perca_fluviatilis
pike-perch, pike_perch
walleye, walleyed_pike, jack_salmon, dory, Stizostedion_vitreum
blue_pike, blue_pickerel, blue_pikeperch, blue_walleye, Strizostedion_vitreum_glaucum
snail_darter, Percina_tanasi
cusk-eel
brotula
pearlfish, pearl-fish
robalo
snook
pike
northern_pike, Esox_lucius
muskellunge, Esox_masquinongy
pickerel
chain_pickerel, chain_pike, Esox_niger
redfin_pickerel, barred_pickerel, Esox_americanus
sunfish, centrarchid
crappie
black_crappie, Pomoxis_nigromaculatus
white_crappie, Pomoxis_annularis
freshwater_bream, bream
pumpkinseed, Lepomis_gibbosus
bluegill, Lepomis_macrochirus
spotted_sunfish, stumpknocker, Lepomis_punctatus
freshwater_bass
rock_bass, rock_sunfish, Ambloplites_rupestris
black_bass
Kentucky_black_bass, spotted_black_bass, Micropterus_pseudoplites
smallmouth, smallmouth_bass, smallmouthed_bass, smallmouth_black_bass, smallmouthed_black_bass, Micropterus_dolomieu
largemouth, largemouth_bass, largemouthed_bass, largemouth_black_bass, largemouthed_black_bass, Micropterus_salmoides
bass
serranid_fish, serranid
white_perch, silver_perch, Morone_americana
yellow_bass, Morone_interrupta
blackmouth_bass, Synagrops_bellus
rock_sea_bass, rock_bass, Centropristis_philadelphica
striped_bass, striper, Roccus_saxatilis, rockfish
stone_bass, wreckfish, Polyprion_americanus
grouper
hind
rock_hind, Epinephelus_adscensionis
creole-fish, Paranthias_furcifer
jewfish, Mycteroperca_bonaci
soapfish
surfperch, surffish, surf_fish
rainbow_seaperch, rainbow_perch, Hipsurus_caryi
bigeye
catalufa, Priacanthus_arenatus
cardinalfish
flame_fish, flamefish, Apogon_maculatus
tilefish, Lopholatilus_chamaeleonticeps
bluefish, Pomatomus_saltatrix
cobia, Rachycentron_canadum, sergeant_fish
remora, suckerfish, sucking_fish
sharksucker, Echeneis_naucrates
whale_sucker, whalesucker, Remilegia_australis
carangid_fish, carangid
jack
crevalle_jack, jack_crevalle, Caranx_hippos
yellow_jack, Caranx_bartholomaei
runner, blue_runner, Caranx_crysos
rainbow_runner, Elagatis_bipinnulata
leatherjacket, leatherjack
threadfish, thread-fish, Alectis_ciliaris
moonfish, Atlantic_moonfish, horsefish, horsehead, horse-head, dollarfish, Selene_setapinnis
lookdown, lookdown_fish, Selene_vomer
amberjack, amberfish
yellowtail, Seriola_dorsalis
kingfish, Seriola_grandis
pompano
Florida_pompano, Trachinotus_carolinus
permit, Trachinotus_falcatus
scad
horse_mackerel, jack_mackerel, Spanish_mackerel, saurel, Trachurus_symmetricus
horse_mackerel, saurel, Trachurus_trachurus
bigeye_scad, big-eyed_scad, goggle-eye, Selar_crumenophthalmus
mackerel_scad, mackerel_shad, Decapterus_macarellus
round_scad, cigarfish, quiaquia, Decapterus_punctatus
dolphinfish, dolphin, mahimahi
Coryphaena_hippurus
Coryphaena_equisetis
pomfret, Brama_raii
characin, characin_fish, characid
tetra
cardinal_tetra, Paracheirodon_axelrodi
piranha, pirana, caribe
cichlid, cichlid_fish
bolti, Tilapia_nilotica
snapper
red_snapper, Lutjanus_blackfordi
grey_snapper, gray_snapper, mangrove_snapper, Lutjanus_griseus
mutton_snapper, muttonfish, Lutjanus_analis
schoolmaster, Lutjanus_apodus
yellowtail, yellowtail_snapper, Ocyurus_chrysurus
grunt
margate, Haemulon_album
Spanish_grunt, Haemulon_macrostomum
tomtate, Haemulon_aurolineatum
cottonwick, Haemulon_malanurum
sailor's-choice, sailors_choice, Haemulon_parra
porkfish, pork-fish, Anisotremus_virginicus
pompon, black_margate, Anisotremus_surinamensis
pigfish, hogfish, Orthopristis_chrysopterus
sparid, sparid_fish
sea_bream, bream
porgy
red_porgy, Pagrus_pagrus
European_sea_bream, Pagellus_centrodontus
Atlantic_sea_bream, Archosargus_rhomboidalis
sheepshead, Archosargus_probatocephalus
pinfish, sailor's-choice, squirrelfish, Lagodon_rhomboides
sheepshead_porgy, Calamus_penna
snapper, Chrysophrys_auratus
black_bream, Chrysophrys_australis
scup, northern_porgy, northern_scup, Stenotomus_chrysops
scup, southern_porgy, southern_scup, Stenotomus_aculeatus
sciaenid_fish, sciaenid
striped_drum, Equetus_pulcher
jackknife-fish, Equetus_lanceolatus
silver_perch, mademoiselle, Bairdiella_chrysoura
red_drum, channel_bass, redfish, Sciaenops_ocellatus
mulloway, jewfish, Sciaena_antarctica
maigre, maiger, Sciaena_aquila
croaker
Atlantic_croaker, Micropogonias_undulatus
yellowfin_croaker, surffish, surf_fish, Umbrina_roncador
whiting
kingfish
king_whiting, Menticirrhus_americanus
northern_whiting, Menticirrhus_saxatilis
corbina, Menticirrhus_undulatus
white_croaker, chenfish, kingfish, Genyonemus_lineatus
white_croaker, queenfish, Seriphus_politus
sea_trout
weakfish, Cynoscion_regalis
spotted_weakfish, spotted_sea_trout, spotted_squeateague, Cynoscion_nebulosus
mullet
goatfish, red_mullet, surmullet, Mullus_surmuletus
red_goatfish, Mullus_auratus
yellow_goatfish, Mulloidichthys_martinicus
mullet, grey_mullet, gray_mullet
striped_mullet, Mugil_cephalus
white_mullet, Mugil_curema
liza, Mugil_liza
silversides, silverside
jacksmelt, Atherinopsis_californiensis
barracuda
great_barracuda, Sphyraena_barracuda
sweeper
sea_chub
Bermuda_chub, rudderfish, Kyphosus_sectatrix
spadefish, angelfish, Chaetodipterus_faber
butterfly_fish
chaetodon
angelfish
rock_beauty, Holocanthus_tricolor
damselfish, demoiselle
beaugregory, Pomacentrus_leucostictus
anemone_fish
clown_anemone_fish, Amphiprion_percula
sergeant_major, Abudefduf_saxatilis
wrasse
pigfish, giant_pigfish, Achoerodus_gouldii
hogfish, hog_snapper, Lachnolaimus_maximus
slippery_dick, Halicoeres_bivittatus
puddingwife, pudding-wife, Halicoeres_radiatus
bluehead, Thalassoma_bifasciatum
pearly_razorfish, Hemipteronatus_novacula
tautog, blackfish, Tautoga_onitis
cunner, bergall, Tautogolabrus_adspersus
parrotfish, polly_fish, pollyfish
threadfin
jawfish
stargazer
sand_stargazer
blenny, combtooth_blenny
shanny, Blennius_pholis
Molly_Miller, Scartella_cristata
clinid, clinid_fish
pikeblenny
bluethroat_pikeblenny, Chaenopsis_ocellata
gunnel, bracketed_blenny
rock_gunnel, butterfish, Pholis_gunnellus
eelblenny
wrymouth, ghostfish, Cryptacanthodes_maculatus
wolffish, wolf_fish, catfish
viviparous_eelpout, Zoarces_viviparus
ocean_pout, Macrozoarces_americanus
sand_lance, sand_launce, sand_eel, launce
dragonet
goby, gudgeon
mudskipper, mudspringer
sleeper, sleeper_goby
flathead
archerfish, Toxotes_jaculatrix
surgeonfish
gempylid
snake_mackerel, Gempylus_serpens
escolar, Lepidocybium_flavobrunneum
oilfish, Ruvettus_pretiosus
cutlassfish, frost_fish, hairtail
scombroid, scombroid_fish
mackerel
common_mackerel, shiner, Scomber_scombrus
Spanish_mackerel, Scomber_colias
chub_mackerel, tinker, Scomber_japonicus
wahoo, Acanthocybium_solandri
Spanish_mackerel
king_mackerel, cavalla, cero, Scomberomorus_cavalla
Scomberomorus_maculatus
cero, pintado, kingfish, Scomberomorus_regalis
sierra, Scomberomorus_sierra
tuna, tunny
albacore, long-fin_tunny, Thunnus_alalunga
bluefin, bluefin_tuna, horse_mackerel, Thunnus_thynnus
yellowfin, yellowfin_tuna, Thunnus_albacares
bonito
skipjack, Atlantic_bonito, Sarda_sarda
Chile_bonito, Chilean_bonito, Pacific_bonito, Sarda_chiliensis
skipjack, skipjack_tuna, Euthynnus_pelamis
bonito, oceanic_bonito, Katsuwonus_pelamis
swordfish, Xiphias_gladius
sailfish
Atlantic_sailfish, Istiophorus_albicans
billfish
marlin
blue_marlin, Makaira_nigricans
black_marlin, Makaira_mazara, Makaira_marlina
striped_marlin, Makaira_mitsukurii
white_marlin, Makaira_albida
spearfish
louvar, Luvarus_imperialis
dollarfish, Poronotus_triacanthus
palometa, California_pompano, Palometa_simillima
harvestfish, Paprilus_alepidotus
driftfish
barrelfish, black_rudderfish, Hyperglyphe_perciformis
clingfish
tripletail
Atlantic_tripletail, Lobotes_surinamensis
Pacific_tripletail, Lobotes_pacificus
mojarra
yellowfin_mojarra, Gerres_cinereus
silver_jenny, Eucinostomus_gula
whiting
ganoid, ganoid_fish
bowfin, grindle, dogfish, Amia_calva
paddlefish, duckbill, Polyodon_spathula
Chinese_paddlefish, Psephurus_gladis
sturgeon
Pacific_sturgeon, white_sturgeon, Sacramento_sturgeon, Acipenser_transmontanus
beluga, hausen, white_sturgeon, Acipenser_huso
gar, garfish, garpike, billfish, Lepisosteus_osseus
scorpaenoid, scorpaenoid_fish
scorpaenid, scorpaenid_fish
scorpionfish, scorpion_fish, sea_scorpion
plumed_scorpionfish, Scorpaena_grandicornis
lionfish
stonefish, Synanceja_verrucosa
rockfish
copper_rockfish, Sebastodes_caurinus
vermillion_rockfish, rasher, Sebastodes_miniatus
red_rockfish, Sebastodes_ruberrimus
rosefish, ocean_perch, Sebastodes_marinus
bullhead
miller's-thumb
sea_raven, Hemitripterus_americanus
lumpfish, Cyclopterus_lumpus
lumpsucker
pogge, armed_bullhead, Agonus_cataphractus
greenling
kelp_greenling, Hexagrammos_decagrammus
painted_greenling, convict_fish, convictfish, Oxylebius_pictus
flathead
gurnard
tub_gurnard, yellow_gurnard, Trigla_lucerna
sea_robin, searobin
northern_sea_robin, Prionotus_carolinus
flying_gurnard, flying_robin, butterflyfish
plectognath, plectognath_fish
triggerfish
queen_triggerfish, Bessy_cerca, oldwench, oldwife, Balistes_vetula
filefish
leatherjacket, leatherfish
boxfish, trunkfish
cowfish, Lactophrys_quadricornis
puffer, pufferfish, blowfish, globefish
spiny_puffer
porcupinefish, porcupine_fish, Diodon_hystrix
balloonfish, Diodon_holocanthus
burrfish
ocean_sunfish, sunfish, mola, headfish
sharptail_mola, Mola_lanceolata
flatfish
flounder
righteye_flounder, righteyed_flounder
plaice, Pleuronectes_platessa
European_flatfish, Platichthys_flesus
yellowtail_flounder, Limanda_ferruginea
winter_flounder, blackback_flounder, lemon_sole, Pseudopleuronectes_americanus
lemon_sole, Microstomus_kitt
American_plaice, Hippoglossoides_platessoides
halibut, holibut
Atlantic_halibut, Hippoglossus_hippoglossus
Pacific_halibut, Hippoglossus_stenolepsis
lefteye_flounder, lefteyed_flounder
southern_flounder, Paralichthys_lethostigmus
summer_flounder, Paralichthys_dentatus
whiff
horned_whiff, Citharichthys_cornutus
sand_dab
windowpane, Scophthalmus_aquosus
brill, Scophthalmus_rhombus
turbot, Psetta_maxima
tonguefish, tongue-fish
sole
European_sole, Solea_solea
English_sole, lemon_sole, Parophrys_vitulus
hogchoker, Trinectes_maculatus
aba
abacus
abandoned_ship, derelict
A_battery
abattoir, butchery, shambles, slaughterhouse
abaya
Abbe_condenser
abbey
abbey
abbey
Abney_level
abrader, abradant
abrading_stone
abutment
abutment_arch
academic_costume
academic_gown, academic_robe, judge's_robe
accelerator, throttle, throttle_valve
accelerator, particle_accelerator, atom_smasher
accelerator, accelerator_pedal, gas_pedal, gas, throttle, gun
accelerometer
accessory, accoutrement, accouterment
accommodating_lens_implant, accommodating_IOL
accommodation
accordion, piano_accordion, squeeze_box
acetate_disk, phonograph_recording_disk
acetate_rayon, acetate
achromatic_lens
acoustic_delay_line, sonic_delay_line
acoustic_device
acoustic_guitar
acoustic_modem
acropolis
acrylic
acrylic, acrylic_paint
actinometer
action, action_mechanism
active_matrix_screen
actuator
adapter, adaptor
adder
adding_machine, totalizer, totaliser
addressing_machine, Addressograph
adhesive_bandage
adit
adjoining_room
adjustable_wrench, adjustable_spanner
adobe, adobe_brick
adz, adze
aeolian_harp, aeolian_lyre, wind_harp
aerator
aerial_torpedo
aerosol, aerosol_container, aerosol_can, aerosol_bomb, spray_can
Aertex
afghan
Afro-wig
afterburner
after-shave, after-shave_lotion
agateware
agglomerator
aglet, aiglet, aiguilette
aglet, aiglet
agora, public_square
aigrette, aigret
aileron
air_bag
airbrake
airbrush
airbus
air_compressor
air_conditioner, air_conditioning
aircraft
aircraft_carrier, carrier, flattop, attack_aircraft_carrier
aircraft_engine
air_cushion, air_spring
airdock, hangar, repair_shed
airfield, landing_field, flying_field, field
air_filter, air_cleaner
airfoil, aerofoil, control_surface, surface
airframe
air_gun, airgun, air_rifle
air_hammer, jackhammer, pneumatic_hammer
air_horn
airing_cupboard
airliner
airmailer
airplane, aeroplane, plane
airplane_propeller, airscrew, prop
airport, airdrome, aerodrome, drome
air_pump, vacuum_pump
air_search_radar
airship, dirigible
air_terminal, airport_terminal
air-to-air_missile
air-to-ground_missile, air-to-surface_missile
aisle
Aladdin's_lamp
alarm, warning_device, alarm_system
alarm_clock, alarm
alb
alcazar
alcohol_thermometer, alcohol-in-glass_thermometer
alehouse
alembic
algometer
alidade, alidad
alidade, alidad
A-line
Allen_screw
Allen_wrench
alligator_wrench
alms_dish, alms_tray
alpaca
alpenstock
altar
altar, communion_table, Lord's_table
altarpiece, reredos
altazimuth
alternator
altimeter
Amati
ambulance
amen_corner
American_organ
ammeter
ammonia_clock
ammunition, ammo
amphibian, amphibious_aircraft
amphibian, amphibious_vehicle
amphitheater, amphitheatre, coliseum
amphitheater, amphitheatre
amphora
amplifier
ampulla
amusement_arcade
analog_clock
analog_computer, analogue_computer
analog_watch
analytical_balance, chemical_balance
analyzer, analyser
anamorphosis, anamorphism
anastigmat
anchor, ground_tackle
anchor_chain, anchor_rope
anchor_light, riding_light, riding_lamp
AND_circuit, AND_gate
andiron, firedog, dog, dog-iron
android, humanoid, mechanical_man
anechoic_chamber
anemometer, wind_gauge, wind_gage
aneroid_barometer, aneroid
angiocardiogram
angioscope
angle_bracket, angle_iron
angledozer
ankle_brace
anklet, anklets, bobbysock, bobbysocks
anklet
ankus
anode
anode
answering_machine
antenna, aerial, transmitting_aerial
anteroom, antechamber, entrance_hall, hall, foyer, lobby, vestibule
antiaircraft, antiaircraft_gun, flak, flack, pom-pom, ack-ack, ack-ack_gun
antiballistic_missile, ABM
antifouling_paint
anti-G_suit, G_suit
antimacassar
antiperspirant
anti-submarine_rocket
anvil
ao_dai
apadana
apartment, flat
apartment_building, apartment_house
aperture
aperture
apiary, bee_house
apparatus, setup
apparel, wearing_apparel, dress, clothes
applecart
appliance
appliance, contraption, contrivance, convenience, gadget, gizmo, gismo, widget
applicator, applier
appointment, fitting
apron
apron_string
apse, apsis
aqualung, Aqua-Lung, scuba
aquaplane
aquarium, fish_tank, marine_museum
arabesque
arbor, arbour, bower, pergola
arcade, colonnade
arch
architecture
architrave
arch_support
arc_lamp, arc_light
arctic, galosh, golosh, rubber, gumshoe
area
areaway
argyle, argyll
ark
arm
armament
armature
armband
armchair
armet
arm_guard, arm_pad
armhole
armilla
armlet, arm_band
armoire
armor, armour
armored_car, armoured_car
armored_car, armoured_car
armored_personnel_carrier, armoured_personnel_carrier, APC
armored_vehicle, armoured_vehicle
armor_plate, armour_plate, armor_plating, plate_armor, plate_armour
armory, armoury, arsenal
armrest
arquebus, harquebus, hackbut, hagbut
array
array, raiment, regalia
arrester, arrester_hook
arrow
arsenal, armory, armoury
arterial_road
arthrogram
arthroscope
artificial_heart
artificial_horizon, gyro_horizon, flight_indicator
artificial_joint
artificial_kidney, hemodialyzer
artificial_skin
artillery, heavy_weapon, gun, ordnance
artillery_shell
artist's_loft
art_school
ascot
ashcan, trash_can, garbage_can, wastebin, ash_bin, ash-bin, ashbin, dustbin, trash_barrel, trash_bin
ash-pan
ashtray
aspergill, aspersorium
aspersorium
aspirator
aspirin_powder, headache_powder
assault_gun
assault_rifle, assault_gun
assegai, assagai
assembly
assembly
assembly_hall
assembly_plant
astatic_coils
astatic_galvanometer
astrodome
astrolabe
astronomical_telescope
astronomy_satellite
athenaeum, atheneum
athletic_sock, sweat_sock, varsity_sock
athletic_supporter, supporter, suspensor, jockstrap, jock
atlas, telamon
atmometer, evaporometer
atom_bomb, atomic_bomb, A-bomb, fission_bomb, plutonium_bomb
atomic_clock
atomic_pile, atomic_reactor, pile, chain_reactor
atomizer, atomiser, spray, sprayer, nebulizer, nebuliser
atrium
attache_case, attache
attachment, bond
attack_submarine
attenuator
attic
attic_fan
attire, garb, dress
audio_amplifier
audiocassette
audio_CD, audio_compact_disc
audiometer, sonometer
audio_system, sound_system
audiotape
audiotape
audiovisual, audiovisual_aid
auditorium
auger, gimlet, screw_auger, wimble
autobahn
autoclave, sterilizer, steriliser
autofocus
autogiro, autogyro, gyroplane
autoinjector
autoloader, self-loader
automat
automat
automatic_choke
automatic_firearm, automatic_gun, automatic_weapon
automatic_pistol, automatic
automatic_rifle, automatic, machine_rifle
automatic_transmission, automatic_drive
automation
automaton, robot, golem
automobile_engine
automobile_factory, auto_factory, car_factory
automobile_horn, car_horn, motor_horn, horn, hooter
autopilot, automatic_pilot, robot_pilot
autoradiograph
autostrada
auxiliary_boiler, donkey_boiler
auxiliary_engine, donkey_engine
auxiliary_pump, donkey_pump
auxiliary_research_submarine
auxiliary_storage, external_storage, secondary_storage
aviary, bird_sanctuary, volary
awl
awning, sunshade, sunblind
ax, axe
ax_handle, axe_handle
ax_head, axe_head
axis, axis_of_rotation
axle
axle_bar
axletree
babushka
baby_bed, baby's_bed
baby_buggy, baby_carriage, carriage, perambulator, pram, stroller, go-cart, pushchair, pusher
baby_grand, baby_grand_piano, parlor_grand, parlor_grand_piano, parlour_grand, parlour_grand_piano
baby_powder
baby_shoe
back, backrest
back
backbench
backboard
backboard, basketball_backboard
backbone
back_brace
backgammon_board
background, desktop, screen_background
backhoe
backlighting
backpack, back_pack, knapsack, packsack, rucksack, haversack
backpacking_tent, pack_tent
backplate
back_porch
backsaw, back_saw
backscratcher
backseat
backspace_key, backspace, backspacer
backstairs
backstay
backstop
backsword
backup_system
badminton_court
badminton_equipment
badminton_racket, badminton_racquet, battledore
bag
bag, traveling_bag, travelling_bag, grip, suitcase
bag, handbag, pocketbook, purse
baggage, luggage
baggage
baggage_car, luggage_van
baggage_claim
bagpipe
bailey
bailey
Bailey_bridge
bain-marie
bait, decoy, lure
baize
bakery, bakeshop, bakehouse
balaclava, balaclava_helmet
balalaika
balance
balance_beam, beam
balance_wheel, balance
balbriggan
balcony
balcony
baldachin
baldric, baldrick
bale
baling_wire
ball
ball
ball_and_chain
ball-and-socket_joint
ballast, light_ballast
ball_bearing, needle_bearing, roller_bearing
ball_cartridge
ballcock, ball_cock
balldress
ballet_skirt, tutu
ball_gown
ballistic_galvanometer
ballistic_missile
ballistic_pendulum
ballistocardiograph, cardiograph
balloon
balloon_bomb, Fugo
balloon_sail
ballot_box
ballpark, park
ball-peen_hammer
ballpoint, ballpoint_pen, ballpen, Biro
ballroom, dance_hall, dance_palace
ball_valve
balsa_raft, Kon_Tiki
baluster
banana_boat
band
bandage, patch
Band_Aid
bandanna, bandana
bandbox
banderilla
bandoleer, bandolier
bandoneon
bandsaw, band_saw
bandwagon
bangalore_torpedo
bangle, bauble, gaud, gewgaw, novelty, fallal, trinket
banjo
banner, streamer
bannister, banister, balustrade, balusters, handrail
banquette
banyan, banian
baptismal_font, baptistry, baptistery, font
bar
bar
barbecue, barbeque
barbed_wire, barbwire
barbell
barber_chair
barbershop
barbette_carriage
barbican, barbacan
bar_bit
bareboat
barge, flatboat, hoy, lighter
barge_pole
baritone, baritone_horn
bark, barque
bar_magnet
bar_mask
barn
barndoor
barn_door
barnyard
barograph
barometer
barong
barouche
bar_printer
barrack
barrage_balloon
barrel, cask
barrel, gun_barrel
barrelhouse, honky-tonk
barrel_knot, blood_knot
barrel_organ, grind_organ, hand_organ, hurdy_gurdy, hurdy-gurdy, street_organ
barrel_vault
barrette
barricade
barrier
barroom, bar, saloon, ginmill, taproom
barrow, garden_cart, lawn_cart, wheelbarrow
bascule
base, pedestal, stand
base, bag
baseball
baseball_bat, lumber
baseball_cap, jockey_cap, golf_cap
baseball_equipment
baseball_glove, glove, baseball_mitt, mitt
basement, cellar
basement
basic_point_defense_missile_system
basilica, Roman_basilica
basilica
basilisk
basin
basinet
basket, handbasket
basket, basketball_hoop, hoop
basketball
basketball_court
basketball_equipment
basket_weave
bass
bass_clarinet
bass_drum, gran_casa
basset_horn
bass_fiddle, bass_viol, bull_fiddle, double_bass, contrabass, string_bass
bass_guitar
bass_horn, sousaphone, tuba
bassinet
bassinet
bassoon
baster
bastinado
bastion
bastion, citadel
bat
bath
bath_chair
bathhouse, bagnio
bathhouse, bathing_machine
bathing_cap, swimming_cap
bath_oil
bathrobe
bathroom, bath
bath_salts
bath_towel
bathtub, bathing_tub, bath, tub
bathyscaphe, bathyscaph, bathyscape
bathysphere
batik
batiste
baton, wand
baton
baton
baton
battering_ram
batter's_box
battery, electric_battery
battery, stamp_battery
batting_cage, cage
batting_glove
batting_helmet
battle-ax, battle-axe
battle_cruiser
battle_dress
battlement, crenelation, crenellation
battleship, battlewagon
battle_sight, battlesight
bay
bay
bayonet
bay_rum
bay_window, bow_window
bazaar, bazar
bazaar, bazar
bazooka
B_battery
BB_gun
beach_house
beach_towel
beach_wagon, station_wagon, wagon, estate_car, beach_waggon, station_waggon, waggon
beachwear
beacon, lighthouse, beacon_light, pharos
beading_plane
beaker
beaker
beam
beam_balance
beanbag
beanie, beany
bearing
bearing_rein, checkrein
bearing_wall
bearskin, busby, shako
beater
beating-reed_instrument, reed_instrument, reed
beaver, castor
beaver
Beckman_thermometer
bed
bed
bed_and_breakfast, bed-and-breakfast
bedclothes, bed_clothing, bedding
Bedford_cord
bed_jacket
bedpan
bedpost
bedroll
bedroom, sleeping_room, sleeping_accommodation, chamber, bedchamber
bedroom_furniture
bedsitting_room, bedsitter, bedsit
bedspread, bedcover, bed_cover, bed_covering, counterpane, spread
bedspring
bedstead, bedframe
beefcake
beehive, hive
beeper, pager
beer_barrel, beer_keg
beer_bottle
beer_can
beer_garden
beer_glass
beer_hall
beer_mat
beer_mug, stein
belaying_pin
belfry
bell
bell_arch
bellarmine, longbeard, long-beard, greybeard
bellbottom_trousers, bell-bottoms, bellbottom_pants
bell_cote, bell_cot
bell_foundry
bell_gable
bell_jar, bell_glass
bellows
bellpull
bell_push
bell_seat, balloon_seat
bell_tent
bell_tower
bellyband
belt
belt, belt_ammunition, belted_ammunition
belt_buckle
belting
bench
bench_clamp
bench_hook
bench_lathe
bench_press
bender
beret
berlin
Bermuda_shorts, Jamaica_shorts
berth, bunk, built_in_bed
besom
Bessemer_converter
bethel
betting_shop
bevatron
bevel, bevel_square
bevel_gear, pinion_and_crown_wheel, pinion_and_ring_gear
B-flat_clarinet, licorice_stick
bib
bib-and-tucker
bicorn, bicorne
bicycle, bike, wheel, cycle
bicycle-built-for-two, tandem_bicycle, tandem
bicycle_chain
bicycle_clip, trouser_clip
bicycle_pump
bicycle_rack
bicycle_seat, saddle
bicycle_wheel
bidet
bier
bier
bi-fold_door
bifocals
Big_Blue, BLU-82
big_board
bight
bikini, two-piece
bikini_pants
bilge
bilge_keel
bilge_pump
bilge_well
bill, peak, eyeshade, visor, vizor
bill, billhook
billboard, hoarding
billiard_ball
billiard_room, billiard_saloon, billiard_parlor, billiard_parlour, billiard_hall
bin
binder, ligature
binder, ring-binder
bindery
binding, book_binding, cover, back
bin_liner
binnacle
binoculars, field_glasses, opera_glasses
binocular_microscope
biochip
biohazard_suit
bioscope
biplane
birch, birch_rod
birchbark_canoe, birchbark, birch_bark
birdbath
birdcage
birdcall
bird_feeder, birdfeeder, feeder
birdhouse
bird_shot, buckshot, duck_shot
biretta, berretta, birretta
bishop
bistro
bit
bit
bite_plate, biteplate
bitewing
bitumastic
black
black
blackboard, chalkboard
blackboard_eraser
black_box
blackface
blackjack, cosh, sap
black_tie
blackwash
bladder
blade
blade, vane
blade
blank, dummy, blank_shell
blanket, cover
blast_furnace
blasting_cap
blazer, sport_jacket, sport_coat, sports_jacket, sports_coat
blender, liquidizer, liquidiser
blimp, sausage_balloon, sausage
blind, screen
blind_curve, blind_bend
blindfold
bling, bling_bling
blinker, flasher
blister_pack, bubble_pack
block
blockade
blockade-runner
block_and_tackle
blockbuster
blockhouse
block_plane
bloodmobile
bloomers, pants, drawers, knickers
blouse
blower
blowtorch, torch, blowlamp
blucher
bludgeon
blue
blue_chip
blunderbuss
blunt_file
boarding
boarding_house, boardinghouse
boardroom, council_chamber
boards
boat
boater, leghorn, Panama, Panama_hat, sailor, skimmer, straw_hat
boat_hook
boathouse
boatswain's_chair, bosun's_chair
boat_train
boatyard
bobbin, spool, reel
bobby_pin, hairgrip, grip
bobsled, bobsleigh, bob
bobsled, bobsleigh
bocce_ball, bocci_ball, boccie_ball
bodega
bodice
bodkin, threader
bodkin
bodkin
body
body_armor, body_armour, suit_of_armor, suit_of_armour, coat_of_mail, cataphract
body_lotion
body_stocking
body_plethysmograph
body_pad
bodywork
Bofors_gun
bogy, bogie, bogey
boiler, steam_boiler
boiling_water_reactor, BWR
bolero
bollard, bitt
bolo, bolo_knife
bolo_tie, bolo, bola_tie, bola
bolt
bolt, deadbolt
bolt
bolt_cutter
bomb
bombazine
bomb_calorimeter, bomb
bomber
bomber_jacket
bomblet, cluster_bomblet
bomb_rack
bombshell
bomb_shelter, air-raid_shelter, bombproof
bone-ash_cup, cupel, refractory_pot
bone_china
bones, castanets, clappers, finger_cymbals
boneshaker
bongo, bongo_drum
bonnet, poke_bonnet
book
book_bag
bookbindery
bookcase
bookend
bookmark, bookmarker
bookmobile
bookshelf
bookshop, bookstore, bookstall
boom
boom, microphone_boom
boomerang, throwing_stick, throw_stick
booster, booster_rocket, booster_unit, takeoff_booster, takeoff_rocket
booster, booster_amplifier, booster_station, relay_link, relay_station, relay_transmitter
boot
boot
boot_camp
bootee, bootie
booth, cubicle, stall, kiosk
booth
booth
boothose
bootjack
bootlace
bootleg
bootstrap
bore_bit, borer, rock_drill, stone_drill
boron_chamber
borstal
bosom
Boston_rocker
bota
bottle
bottle, feeding_bottle, nursing_bottle
bottle_bank
bottlebrush
bottlecap
bottle_opener
bottling_plant
bottom, freighter, merchantman, merchant_ship
boucle
boudoir
boulle, boule, buhl
bouncing_betty
bouquet, corsage, posy, nosegay
boutique, dress_shop
boutonniere
bow
bow
bow, bowknot
bow_and_arrow
bowed_stringed_instrument, string
Bowie_knife
bowl
bowl
bowl
bowler_hat, bowler, derby_hat, derby, plug_hat
bowline, bowline_knot
bowling_alley
bowling_ball, bowl
bowling_equipment
bowling_pin, pin
bowling_shoe
bowsprit
bowstring
bow_tie, bow-tie, bowtie
box
box, loge
box, box_seat
box_beam, box_girder
box_camera, box_Kodak
boxcar
box_coat
boxing_equipment
boxing_glove, glove
box_office, ticket_office, ticket_booth
box_spring
box_wrench, box_end_wrench
brace, bracing
brace, braces, orthodontic_braces
brace
brace, suspender, gallus
brace_and_bit
bracelet, bangle
bracer, armguard
brace_wrench
bracket, wall_bracket
bradawl, pricker
brake
brake
brake_band
brake_cylinder, hydraulic_brake_cylinder, master_cylinder
brake_disk
brake_drum, drum
brake_lining
brake_pad
brake_pedal
brake_shoe, shoe, skid
brake_system, brakes
brass, brass_instrument
brass, memorial_tablet, plaque
brass
brassard
brasserie
brassie
brassiere, bra, bandeau
brass_knucks, knucks, brass_knuckles, knuckles, knuckle_duster
brattice
brazier, brasier
breadbasket
bread-bin, breadbox
bread_knife
breakable
breakfast_area, breakfast_nook
breakfast_table
breakwater, groin, groyne, mole, bulwark, seawall, jetty
breast_drill
breast_implant
breastplate, aegis, egis
breast_pocket
breathalyzer, breathalyser
breechblock, breech_closer
breechcloth, breechclout, loincloth
breeches, knee_breeches, knee_pants, knickerbockers, knickers
breeches_buoy
breechloader
breeder_reactor
Bren, Bren_gun
brewpub
brick
brickkiln
bricklayer's_hammer
brick_trowel, mason's_trowel
brickwork
bridal_gown, wedding_gown, wedding_dress
bridge, span
bridge, nosepiece
bridle
bridle_path, bridle_road
bridoon
briefcase
briefcase_bomb
briefcase_computer
briefs, Jockey_shorts
brig
brig
brigandine
brigantine, hermaphrodite_brig
brilliantine
brilliant_pebble
brim
bristle_brush
britches
broad_arrow
broadax, broadaxe
brochette
broadcaster, spreader
broadcloth
broadcloth
broad_hatchet
broadloom
broadside
broadsword
brocade
brogan, brogue, clodhopper, work_shoe
broiler
broken_arch
bronchoscope
broom
broom_closet
broomstick, broom_handle
brougham
Browning_automatic_rifle, BAR
Browning_machine_gun, Peacemaker
brownstone
brunch_coat
brush
Brussels_carpet
Brussels_lace
bubble
bubble_chamber
bubble_jet_printer, bubble-jet_printer, bubblejet
buckboard
bucket, pail
bucket_seat
bucket_shop
buckle
buckram
bucksaw
buckskins
buff, buffer
buffer, polisher
buffer, buffer_storage, buffer_store
buffet, counter, sideboard
buffing_wheel
buggy, roadster
bugle
building, edifice
building_complex, complex
bulldog_clip, alligator_clip
bulldog_wrench
bulldozer, dozer
bullet, slug
bulletproof_vest
bullet_train, bullet
bullhorn, loud_hailer, loud-hailer
bullion
bullnose, bullnosed_plane
bullpen, detention_cell, detention_centre
bullpen
bullring
bulwark
bumboat
bumper
bumper
bumper_car, Dodgem
bumper_guard
bumper_jack
bundle, sheaf
bung, spile
bungalow, cottage
bungee, bungee_cord
bunghole
bunk
bunk, feed_bunk
bunk_bed, bunk
bunker, sand_trap, trap
bunker, dugout
bunker
bunsen_burner, bunsen, etna
bunting
bur, burr
Burberry
burette, buret
burglar_alarm
burial_chamber, sepulcher, sepulchre, sepulture
burial_garment
burial_mound, grave_mound, barrow, tumulus
burin
burqa, burka
burlap, gunny
burn_bag
burner
burnous, burnoose, burnouse
burp_gun, machine_pistol
burr
bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger_vehicle
bushel_basket
bushing, cylindrical_lining
bush_jacket
business_suit
buskin, combat_boot, desert_boot, half_boot, top_boot
bustier
bustle
butcher_knife
butcher_shop, meat_market
butter_dish
butterfly_valve
butter_knife
butt_hinge
butt_joint, butt
button
buttonhook
buttress, buttressing
butt_shaft
butt_weld, butt-weld
buzz_bomb, robot_bomb, flying_bomb, doodlebug, V-1
buzzer
BVD, BVD's
bypass_condenser, bypass_capacitor
byway, bypath, byroad
cab, hack, taxi, taxicab
cab, cabriolet
cab
cabana
cabaret, nightclub, night_club, club, nightspot
caber
cabin
cabin
cabin_car, caboose
cabin_class, second_class, economy_class
cabin_cruiser, cruiser, pleasure_boat, pleasure_craft
cabinet
cabinet, console
cabinet, locker, storage_locker
cabinetwork
cabin_liner
cable, cable_television, cable_system, cable_television_service
cable, line, transmission_line
cable_car, car
cache, memory_cache
caddy, tea_caddy
caesium_clock
cafe, coffeehouse, coffee_shop, coffee_bar
cafeteria
cafeteria_tray
caff
caftan, kaftan
caftan, kaftan
cage, coop
cage
cagoule
caisson
calash, caleche, calash_top
calceus
calcimine
calculator, calculating_machine
caldron, cauldron
calico
caliper, calliper
call-board
call_center, call_centre
caller_ID
calliope, steam_organ
calorimeter
calpac, calpack, kalpac
camail, aventail, ventail
camber_arch
cambric
camcorder
camel's_hair, camelhair
camera, photographic_camera
camera_lens, optical_lens
camera_lucida
camera_obscura
camera_tripod
camise
camisole
camisole, underbodice
camlet
camouflage
camouflage, camo
camp, encampment, cantonment, bivouac
camp
camp, refugee_camp
campaign_hat
campanile, belfry
camp_chair
camper, camping_bus, motor_home
camper_trailer
campstool
camshaft
can, tin, tin_can
canal
canal_boat, narrow_boat, narrowboat
candelabrum, candelabra
candid_camera
candle, taper, wax_light
candlepin
candlesnuffer
candlestick, candle_holder
candlewick
candy_thermometer
cane
cane
cangue
canister, cannister, tin
cannery
cannikin
cannikin
cannon
cannon
cannon
cannon
cannonball, cannon_ball, round_shot
canoe
can_opener, tin_opener
canopic_jar, canopic_vase
canopy
canopy
canopy
canteen
canteen
canteen
canteen, mobile_canteen
canteen
cant_hook
cantilever
cantilever_bridge
cantle
Canton_crepe
canvas, canvass
canvas, canvass
canvas_tent, canvas, canvass
cap
cap
cap
capacitor, capacitance, condenser, electrical_condenser
caparison, trapping, housing
cape, mantle
capital_ship
capitol
cap_opener
capote, hooded_cloak
capote, hooded_coat
cap_screw
capstan
capstone, copestone, coping_stone, stretcher
capsule
captain's_chair
car, auto, automobile, machine, motorcar
car, railcar, railway_car, railroad_car
car, elevator_car
carabiner, karabiner, snap_ring
carafe, decanter
caravansary, caravanserai, khan, caravan_inn
car_battery, automobile_battery
carbine
car_bomb
carbon_arc_lamp, carbon_arc
carboy
carburetor, carburettor
car_carrier
cardcase
cardiac_monitor, heart_monitor
cardigan
card_index, card_catalog, card_catalogue
cardiograph, electrocardiograph
cardioid_microphone
car_door
cardroom
card_table
card_table
car-ferry
cargo_area, cargo_deck, cargo_hold, hold, storage_area
cargo_container
cargo_door
cargo_hatch
cargo_helicopter
cargo_liner
cargo_ship, cargo_vessel
carillon
car_mirror
caroche
carousel, carrousel, merry-go-round, roundabout, whirligig
carpenter's_hammer, claw_hammer, clawhammer
carpenter's_kit, tool_kit
carpenter's_level
carpenter's_mallet
carpenter's_rule
carpenter's_square
carpetbag
carpet_beater, rug_beater
carpet_loom
carpet_pad, rug_pad, underlay, underlayment
carpet_sweeper, sweeper
carpet_tack
carport, car_port
carrack, carack
carrel, carrell, cubicle, stall
carriage, equipage, rig
carriage
carriage_bolt
carriageway
carriage_wrench
carrick_bend
carrier
carryall, holdall, tote, tote_bag
carrycot
car_seat
cart
car_tire, automobile_tire, auto_tire, rubber_tire
carton
cartouche, cartouch
car_train
cartridge
cartridge, pickup
cartridge_belt
cartridge_extractor, cartridge_remover, extractor
cartridge_fuse
cartridge_holder, cartridge_clip, clip, magazine
cartwheel
carving_fork
carving_knife
car_wheel
caryatid
cascade_liquefier
cascade_transformer
case
case, display_case, showcase, vitrine
case, compositor's_case, typesetter's_case
casein_paint, casein
case_knife, sheath_knife
case_knife
casement
casement_window
casern
case_shot, canister, canister_shot
cash_bar
cashbox, money_box, till
cash_machine, cash_dispenser, automated_teller_machine, automatic_teller_machine, automated_teller, automatic_teller, ATM
cashmere
cash_register, register
casing, case
casino, gambling_casino
casket, jewel_casket
casque
casquet, casquetel
Cassegrainian_telescope, Gregorian_telescope
casserole
cassette
cassette_deck
cassette_player
cassette_recorder
cassette_tape
cassock
cast, plaster_cast, plaster_bandage
caster, castor
caster, castor
castle
castle, rook
catacomb
catafalque
catalytic_converter
catalytic_cracker, cat_cracker
catamaran
catapult, arbalest, arbalist, ballista, bricole, mangonel, onager, trebuchet, trebucket
catapult, launcher
catboat
cat_box
catch
catchall
catcher's_mask
catchment
Caterpillar, cat
cathedra, bishop's_throne
cathedral
cathedral, duomo
catheter
cathode
cathode-ray_tube, CRT
cat-o'-nine-tails, cat
cat's-paw
catsup_bottle, ketchup_bottle
cattle_car
cattle_guard, cattle_grid
cattleship, cattle_boat
cautery, cauterant
cavalier_hat, slouch_hat
cavalry_sword, saber, sabre
cavetto
cavity_wall
C_battery
C-clamp
CD_drive
CD_player
CD-R, compact_disc_recordable, CD-WO, compact_disc_write-once
CD-ROM, compact_disc_read-only_memory
CD-ROM_drive
cedar_chest
ceiling
celesta
cell, electric_cell
cell, jail_cell, prison_cell
cellar, wine_cellar
cellblock, ward
cello, violoncello
cellophane
cellular_telephone, cellular_phone, cellphone, cell, mobile_phone
cellulose_tape, Scotch_tape, Sellotape
cenotaph, empty_tomb
censer, thurible
center, centre
center_punch
Centigrade_thermometer
central_processing_unit, CPU, C.P.U., central_processor, processor, mainframe
centrifugal_pump
centrifuge, extractor, separator
ceramic
ceramic_ware
cereal_bowl
cereal_box
cerecloth
cesspool, cesspit, sink, sump
chachka, tsatske, tshatshke, tchotchke
chador, chadar, chaddar, chuddar
chafing_dish
chain
chain
chainlink_fence
chain_mail, ring_mail, mail, chain_armor, chain_armour, ring_armor, ring_armour
chain_printer
chain_saw, chainsaw
chain_store
chain_tongs
chain_wrench
chair
chair
chair_of_state
chairlift, chair_lift
chaise, shay
chaise_longue, chaise, daybed
chalet
chalice, goblet
chalk
challis
chamberpot, potty, thunder_mug
chambray
chamfer_bit
chamfer_plane
chamois_cloth
chancel, sanctuary, bema
chancellery
chancery
chandelier, pendant, pendent
chandlery
chanfron, chamfron, testiere, frontstall, front-stall
chanter, melody_pipe
chantry
chap
chapel
chapterhouse, fraternity_house, frat_house
chapterhouse
character_printer, character-at-a-time_printer, serial_printer
charcuterie
charge-exchange_accelerator
charger, battery_charger
chariot
chariot
charnel_house, charnel
chassis
chassis
chasuble
chateau
chatelaine
checker, chequer
checkout, checkout_counter
cheekpiece
cheeseboard, cheese_tray
cheesecloth
cheese_cutter
cheese_press
chemical_bomb, gas_bomb
chemical_plant
chemical_reactor
chemise, sack, shift
chemise, shimmy, shift, slip, teddy
chenille
chessman, chess_piece
chest
chesterfield
chest_of_drawers, chest, bureau, dresser
chest_protector
cheval-de-frise, chevaux-de-frise
cheval_glass
chicane
chicken_coop, coop, hencoop, henhouse
chicken_wire
chicken_yard, hen_yard, chicken_run, fowl_run
chiffon
chiffonier, commode
child's_room
chime, bell, gong
chimney_breast
chimney_corner, inglenook
china
china_cabinet, china_closet
chinchilla
Chinese_lantern
Chinese_puzzle
chinning_bar
chino
chino
chin_rest
chin_strap
chintz
chip, microchip, micro_chip, silicon_chip, microprocessor_chip
chip, poker_chip
chisel
chlamys
choir
choir_loft
choke
choke, choke_coil, choking_coil
chokey, choky
choo-choo
chopine, platform
chordophone
Christmas_stocking
chronograph
chronometer
chronoscope
chuck
chuck_wagon
chukka, chukka_boot
church, church_building
church_bell
church_hat
church_key
church_tower
churidars
churn, butter_churn
ciderpress
cigar_band
cigar_box
cigar_cutter
cigarette_butt
cigarette_case
cigarette_holder
cigar_lighter, cigarette_lighter, pocket_lighter
cinch, girth
cinema, movie_theater, movie_theatre, movie_house, picture_palace
cinquefoil
circle, round
circlet
circuit, electrical_circuit, electric_circuit
circuit_board, circuit_card, board, card, plug-in, add-in
circuit_breaker, breaker
circuitry
circular_plane, compass_plane
circular_saw, buzz_saw
circus_tent, big_top, round_top, top
cistern
cistern, water_tank
cittern, cithern, cither, citole, gittern
city_hall
cityscape
city_university
civies, civvies
civilian_clothing, civilian_dress, civilian_garb, plain_clothes
clack_valve, clack, clapper_valve
clamp, clinch
clamshell, grapple
clapper, tongue
clapperboard
clarence
clarinet
Clark_cell, Clark_standard_cell
clasp
clasp_knife, jackknife
classroom, schoolroom
clavichord
clavier, Klavier
clay_pigeon
claymore_mine, claymore
claymore
cleaners, dry_cleaners
cleaning_implement, cleaning_device, cleaning_equipment
cleaning_pad
clean_room, white_room
clearway
cleat
cleat
cleats
cleaver, meat_cleaver, chopper
clerestory, clearstory
clevis
clews
cliff_dwelling
climbing_frame
clinch
clinch, clench
clincher
clinic
clinical_thermometer, mercury-in-glass_clinical_thermometer
clinker, clinker_brick
clinometer, inclinometer
clip
clip_lead
clip-on
clipper
clipper
clipper, clipper_ship
cloak
cloak
cloakroom, coatroom
cloche
cloche
clock
clock_pendulum
clock_radio
clock_tower
clockwork
clog, geta, patten, sabot
cloisonne
cloister
closed_circuit, loop
closed-circuit_television
closed_loop, closed-loop_system
closet
closeup_lens
cloth_cap, flat_cap
cloth_covering
clothesbrush
clothes_closet, clothespress
clothes_dryer, clothes_drier
clothes_hamper, laundry_basket, clothes_basket, voider
clotheshorse
clothespin, clothes_pin, clothes_peg
clothes_tree, coat_tree, coat_stand
clothing, article_of_clothing, vesture, wear, wearable, habiliment
clothing_store, haberdashery, haberdashery_store, mens_store
clout_nail, clout
clove_hitch
club_car, lounge_car
clubroom
cluster_bomb
clutch
clutch, clutch_pedal
clutch_bag, clutch
coach, four-in-hand, coach-and-four
coach_house, carriage_house, remise
coal_car
coal_chute
coal_house
coal_shovel
coaming
coaster_brake
coat
coat_button
coat_closet
coatdress
coatee
coat_hanger, clothes_hanger, dress_hanger
coating, coat
coating
coat_of_paint
coatrack, coat_rack, hatrack
coattail
coaxial_cable, coax, coax_cable
cobweb
cobweb
Cockcroft_and_Walton_accelerator, Cockcroft-Walton_accelerator, Cockcroft_and_Walton_voltage_multiplier, Cockcroft-Walton_voltage_multiplier
cocked_hat
cockhorse
cockleshell
cockpit
cockpit
cockpit
cockscomb, coxcomb
cocktail_dress, sheath
cocktail_lounge
cocktail_shaker
cocotte
codpiece
coelostat
coffee_can
coffee_cup
coffee_filter
coffee_maker
coffee_mill, coffee_grinder
coffee_mug
coffeepot
coffee_stall
coffee_table, cocktail_table
coffee_urn
coffer
Coffey_still
coffin, casket
cog, sprocket
coif
coil, spiral, volute, whorl, helix
coil
coil
coil_spring, volute_spring
coin_box
colander, cullender
cold_cathode
cold_chisel, set_chisel
cold_cream, coldcream, face_cream, vanishing_cream
cold_frame
collar, neckband
collar
college
collet, collet_chuck
collider
colliery, pit
collimator
collimator
cologne, cologne_water, eau_de_cologne
colonnade
colonoscope
colorimeter, tintometer
colors, colours
color_television, colour_television, color_television_system, colour_television_system, color_TV, colour_TV
color_tube, colour_tube, color_television_tube, colour_television_tube, color_TV_tube, colour_TV_tube
color_wash, colour_wash
Colt
colter, coulter
columbarium
columbarium, cinerarium
column, pillar
column, pillar
comb
comb
comber
combination_lock
combination_plane
combine
comforter, pacifier, baby's_dummy, teething_ring
command_module
commissary
commissary
commodity, trade_good, good
common_ax, common_axe, Dayton_ax, Dayton_axe
common_room
communications_satellite
communication_system
community_center, civic_center
commutator
commuter, commuter_train
compact, powder_compact
compact, compact_car
compact_disk, compact_disc, CD
compact-disk_burner, CD_burner
companionway
compartment
compartment
compass
compass
compass_card, mariner's_compass
compass_saw
compound
compound_lens
compound_lever
compound_microscope
compress
compression_bandage, tourniquet
compressor
computer, computing_machine, computing_device, data_processor, electronic_computer, information_processing_system
computer_circuit
computerized_axial_tomography_scanner, CAT_scanner
computer_keyboard, keypad
computer_monitor
computer_network
computer_screen, computer_display
computer_store
computer_system, computing_system, automatic_data_processing_system, ADP_system, ADPS
concentration_camp, stockade
concert_grand, concert_piano
concert_hall
concertina
concertina
concrete_mixer, cement_mixer
condensation_pump, diffusion_pump
condenser, optical_condenser
condenser
condenser
condenser_microphone, capacitor_microphone
condominium
condominium, condo
conductor
cone_clutch, cone_friction_clutch
confectionery, confectionary, candy_store
conference_center, conference_house
conference_room
conference_table, council_table, council_board
confessional
conformal_projection, orthomorphic_projection
congress_boot, congress_shoe, congress_gaiter
conic_projection, conical_projection
connecting_rod
connecting_room
connection, connexion, connector, connecter, connective
conning_tower
conning_tower
conservatory, hothouse, indoor_garden
conservatory, conservatoire
console
console
console_table, console
consulate
contact, tangency
contact, contact_lens
container
container_ship, containership, container_vessel
containment
contrabassoon, contrafagotto, double_bassoon
control, controller
control_center
control_circuit, negative_feedback_circuit
control_key, command_key
control_panel, instrument_panel, control_board, board, panel
control_rod
control_room
control_system
control_tower
convector
convenience_store
convent
conventicle, meetinghouse
converging_lens, convex_lens
converter, convertor
convertible
convertible, sofa_bed
conveyance, transport
conveyer_belt, conveyor_belt, conveyer, conveyor, transporter
cooker
cookfire
cookhouse
cookie_cutter
cookie_jar, cooky_jar
cookie_sheet, baking_tray
cooking_utensil, cookware
cookstove
coolant_system
cooler, ice_chest
cooling_system, cooling
cooling_system, engine_cooling_system
cooling_tower
coonskin_cap, coonskin
cope
coping_saw
copperware
copyholder
coquille
coracle
corbel, truss
corbel_arch
corbel_step, corbie-step, corbiestep, crow_step
corbie_gable
cord, corduroy
cord, electric_cord
cordage
cords, corduroys
core
core_bit
core_drill
corer
cork, bottle_cork
corker
corkscrew, bottle_screw
corncrib
corner, quoin
corner, nook
corner_post
cornet, horn, trumpet, trump
cornice
cornice
cornice, valance, valance_board, pelmet
correctional_institution
corrugated_fastener, wiggle_nail
corselet, corslet
corset, girdle, stays
cosmetic
cosmotron
costume
costume
costume
costume
cosy, tea_cosy, cozy, tea_cozy
cot, camp_bed
cottage_tent
cotter, cottar
cotter_pin
cotton
cotton_flannel, Canton_flannel
cotton_mill
couch
couch
couchette
coude_telescope, coude_system
counter
counter, tabulator
counter
counterbore, countersink, countersink_bit
counter_tube
country_house
country_store, general_store, trading_post
coupe
coupling, coupler
court, courtyard
court
court, courtroom
court
Courtelle
courthouse
courthouse
coverall
covered_bridge
covered_couch
covered_wagon, Conestoga_wagon, Conestoga, prairie_wagon, prairie_schooner
covering
coverlet
cover_plate
cowbarn, cowshed, cow_barn, cowhouse, byre
cowbell
cowboy_boot
cowboy_hat, ten-gallon_hat
cowhide
cowl
cow_pen, cattle_pen, corral
CPU_board, mother_board
crackle, crackleware, crackle_china
cradle
craft
cramp, cramp_iron
crampon, crampoon, climbing_iron, climber
crampon, crampoon
crane
craniometer
crank, starter
crankcase
crankshaft
crash_barrier
crash_helmet
crate
cravat
crayon, wax_crayon
crazy_quilt
cream, ointment, emollient
cream_pitcher, creamer
creche, foundling_hospital
creche
credenza, credence
creel
crematory, crematorium, cremation_chamber
crematory, crematorium
crepe, crape
crepe_de_Chine
crescent_wrench
cretonne
crib, cot
crib
cricket_ball
cricket_bat, bat
cricket_equipment
cringle, eyelet, loop, grommet, grummet
crinoline
crinoline
crochet_needle, crochet_hook
crock, earthenware_jar
Crock_Pot
crook, shepherd's_crook
Crookes_radiometer
Crookes_tube
croquet_ball
croquet_equipment
croquet_mallet
cross
crossbar
crossbar
crossbar
crossbench
cross_bit
crossbow
crosscut_saw, crosscut_handsaw, cutoff_saw
crossjack, mizzen_course
crosspiece
crotchet
croupier's_rake
crowbar, wrecking_bar, pry, pry_bar
crown, diadem
crown, crownwork, jacket, jacket_crown, cap
crown_jewels
crown_lens
crow's_nest
crucible, melting_pot
crucifix, rood, rood-tree
cruet, crewet
cruet-stand
cruise_control
cruise_missile
cruiser
cruiser, police_cruiser, patrol_car, police_car, prowl_car, squad_car
cruise_ship, cruise_liner
crupper
cruse
crusher
crutch
cryometer
cryoscope
cryostat
crypt
crystal, watch_crystal, watch_glass
crystal_detector
crystal_microphone
crystal_oscillator, quartz_oscillator
crystal_set
cubitiere
cucking_stool, ducking_stool
cuckoo_clock
cuddy
cudgel
cue, cue_stick, pool_cue, pool_stick
cue_ball
cuff, turnup
cuirass
cuisse
cul, cul_de_sac, dead_end
culdoscope
cullis
culotte
cultivator, tiller
culverin
culverin
culvert
cup
cupboard, closet
cup_hook
cupola
cupola
curb, curb_bit
curb_roof
curbstone, kerbstone
curette, curet
curler, hair_curler, roller, crimper
curling_iron
currycomb
cursor, pointer
curtain, drape, drapery, mantle, pall
customhouse, customshouse
cutaway, cutaway_drawing, cutaway_model
cutlas, cutlass
cutoff
cutout
cutter, cutlery, cutting_tool
cutter
cutting_implement
cutting_room
cutty_stool
cutwork
cybercafe
cyclopean_masonry
cyclostyle
cyclotron
cylinder
cylinder, piston_chamber
cylinder_lock
cymbal
dacha
Dacron, Terylene
dado
dado_plane
dagger, sticker
dairy, dairy_farm
dais, podium, pulpit, rostrum, ambo, stump, soapbox
daisy_print_wheel, daisy_wheel
daisywheel_printer
dam, dike, dyke
damask
dampener, moistener
damper, muffler
damper_block, piano_damper
dark_lantern, bull's-eye
darkroom
darning_needle, embroidery_needle
dart
dart
dashboard, fascia
dashiki, daishiki
dash-pot
data_converter
data_input_device, input_device
data_multiplexer
data_system, information_system
davenport
davenport
davit
daybed, divan_bed
daybook, ledger
day_nursery, day_care_center
day_school
dead_axle
deadeye
deadhead
deanery
deathbed
death_camp
death_house, death_row
death_knell, death_bell
death_seat
deck
deck
deck_chair, beach_chair
deck-house
deckle
deckle_edge, deckle
declinometer, transit_declinometer
decoder
decolletage
decoupage
dedicated_file_server
deep-freeze, Deepfreeze, deep_freezer, freezer
deerstalker
defense_system, defence_system
defensive_structure, defense, defence
defibrillator
defilade
deflector
delayed_action
delay_line
delft
delicatessen, deli, food_shop
delivery_truck, delivery_van, panel_truck
delta_wing
demijohn
demitasse
den
denim, dungaree, jean
densimeter, densitometer
densitometer
dental_appliance
dental_floss, floss
dental_implant
dentist's_drill, burr_drill
denture, dental_plate, plate
deodorant, deodourant
department_store, emporium
departure_lounge
depilatory, depilator, epilator
depressor
depth_finder
depth_gauge, depth_gage
derrick
derrick
derringer
desk
desk_phone
desktop_computer
dessert_spoon
destroyer, guided_missile_destroyer
destroyer_escort
detached_house, single_dwelling
detector, sensor, sensing_element
detector
detention_home, detention_house, house_of_detention, detention_camp
detonating_fuse
detonator, detonating_device, cap
developer
device
Dewar_flask, Dewar
dhoti
dhow
dial, telephone_dial
dial
dial
dialog_box, panel
dial_telephone, dial_phone
dialyzer, dialysis_machine
diamante
diaper, nappy, napkin
diaper
diaphone
diaphragm, stop
diaphragm
diathermy_machine
dibble, dibber
dice_cup, dice_box
dicer
dickey, dickie, dicky, shirtfront
dickey, dickie, dicky, dickey-seat, dickie-seat, dicky-seat
Dictaphone
die
diesel, diesel_engine, diesel_motor
diesel-electric_locomotive, diesel-electric
diesel-hydraulic_locomotive, diesel-hydraulic
diesel_locomotive
diestock
differential_analyzer
differential_gear, differential
diffuser, diffusor
diffuser, diffusor
digester
diggings, digs, domiciliation, lodgings, pad
digital-analog_converter, digital-to-analog_converter
digital_audiotape, DAT
digital_camera
digital_clock
digital_computer
digital_display, alphanumeric_display
digital_subscriber_line, DSL
digital_voltmeter
digital_watch
digitizer, digitiser, analog-digital_converter, analog-to-digital_converter
dilator, dilater
dildo
dimity
dimmer
diner
dinette
dinghy, dory, rowboat
dining_area
dining_car, diner, dining_compartment, buffet_car
dining-hall
dining_room, dining-room
dining-room_furniture
dining-room_table
dining_table, board
dinner_bell
dinner_dress, dinner_gown, formal, evening_gown
dinner_jacket, tux, tuxedo, black_tie
dinner_napkin
dinner_pail, dinner_bucket
dinner_table
dinner_theater, dinner_theatre
diode, semiconductor_diode, junction_rectifier, crystal_rectifier
diode, rectifying_tube, rectifying_valve
dip
diplomatic_building
dipole, dipole_antenna
dipper
dipstick
DIP_switch, dual_inline_package_switch
directional_antenna
directional_microphone
direction_finder
dirk
dirndl
dirndl
dirty_bomb
discharge_lamp
discharge_pipe
disco, discotheque
discount_house, discount_store, discounter, wholesale_house
discus, saucer
disguise
dish
dish, dish_aerial, dish_antenna, saucer
dishpan
dish_rack
dishrag, dishcloth
dishtowel, dish_towel, tea_towel
dishwasher, dish_washer, dishwashing_machine
disk, disc
disk_brake, disc_brake
disk_clutch
disk_controller
disk_drive, disc_drive, hard_drive, Winchester_drive
diskette, floppy, floppy_disk
disk_harrow, disc_harrow
dispatch_case, dispatch_box
dispensary
dispenser
display, video_display
display_adapter, display_adaptor
display_panel, display_board, board
display_window, shop_window, shopwindow, show_window
disposal, electric_pig, garbage_disposal
disrupting_explosive, bursting_explosive
distaff
distillery, still
distributor, distributer, electrical_distributor
distributor_cam
distributor_cap
distributor_housing
distributor_point, breaker_point, point
ditch
ditch_spade, long-handled_spade
ditty_bag
divan
divan, diwan
dive_bomber
diverging_lens, concave_lens
divided_highway, dual_carriageway
divider
diving_bell
divining_rod, dowser, dowsing_rod, waterfinder, water_finder
diving_suit, diving_dress
dixie
Dixie_cup, paper_cup
dock, dockage, docking_facility
doeskin
dogcart
doggie_bag, doggy_bag
dogsled, dog_sled, dog_sleigh
dog_wrench
doily, doyley, doyly
doll, dolly
dollhouse, doll's_house
dolly
dolman
dolman, dolman_jacket
dolman_sleeve
dolmen, cromlech, portal_tomb
dome
dome, domed_stadium, covered_stadium
domino, half_mask, eye_mask
dongle
donkey_jacket
door
door
door
doorbell, bell, buzzer
doorframe, doorcase
doorjamb, doorpost
doorlock
doormat, welcome_mat
doornail
doorplate
doorsill, doorstep, threshold
doorstop, doorstopper
Doppler_radar
dormer, dormer_window
dormer_window
dormitory, dorm, residence_hall, hall, student_residence
dormitory, dormitory_room, dorm_room
dosemeter, dosimeter
dossal, dossel
dot_matrix_printer, matrix_printer, dot_printer
double_bed
double-bitted_ax, double-bitted_axe, Western_ax, Western_axe
double_boiler, double_saucepan
double-breasted_jacket
double-breasted_suit
double_door
double_glazing
double-hung_window
double_knit
doubler
double_reed
double-reed_instrument, double_reed
doublet
doubletree
douche, douche_bag
dovecote, columbarium, columbary
Dover's_powder
dovetail, dovetail_joint
dovetail_plane
dowel, dowel_pin, joggle
downstage
drafting_instrument
drafting_table, drawing_table
Dragunov
drainage_ditch
drainage_system
drain_basket
drainplug
drape
drapery
drawbar
drawbridge, lift_bridge
drawer
drawers, underdrawers, shorts, boxers, boxershorts
drawing_chalk
drawing_room, withdrawing_room
drawing_room
drawknife, drawshave
drawstring_bag
dray, camion
dreadnought, dreadnaught
dredge
dredger
dredging_bucket
dress, frock
dress_blues, dress_whites
dresser
dress_hat, high_hat, opera_hat, silk_hat, stovepipe, top_hat, topper, beaver
dressing, medical_dressing
dressing_case
dressing_gown, robe-de-chambre, lounging_robe
dressing_room
dressing_sack, dressing_sacque
dressing_table, dresser, vanity, toilet_table
dress_rack
dress_shirt, evening_shirt
dress_suit, full_dress, tailcoat, tail_coat, tails, white_tie, white_tie_and_tails
dress_uniform
drift_net
drill
electric_drill
drilling_platform, offshore_rig
drill_press
drill_rig, drilling_rig, oilrig, oil_rig
drinking_fountain, water_fountain, bubbler
drinking_vessel
drip_loop
drip_mat
drip_pan
dripping_pan, drip_pan
drip_pot
drive
drive
drive_line, drive_line_system
driver, number_one_wood
driveshaft
driveway, drive, private_road
driving_iron, one_iron
driving_wheel
drogue, drogue_chute, drogue_parachute
drogue_parachute
drone, drone_pipe, bourdon
drone, pilotless_aircraft, radio-controlled_aircraft
drop_arch
drop_cloth
drop_curtain, drop_cloth, drop
drop_forge, drop_hammer, drop_press
drop-leaf_table
dropper, eye_dropper
droshky, drosky
drove, drove_chisel
drugget
drugstore, apothecary's_shop, chemist's, chemist's_shop, pharmacy
drum, membranophone, tympan
drum, metal_drum
drum_brake
drumhead, head
drum_printer
drum_sander, electric_sander, sander, smoother
drumstick
dry_battery
dry-bulb_thermometer
dry_cell
dry_dock, drydock, graving_dock
dryer, drier
dry_fly
dry_kiln
dry_masonry
dry_point
dry_wall, dry-stone_wall
dual_scan_display
duck
duckboard
duckpin
dudeen
duffel, duffle
duffel_bag, duffle_bag, duffel, duffle
duffel_coat, duffle_coat
dugout
dugout_canoe, dugout, pirogue
dulciana
dulcimer
dulcimer
dumbbell
dumb_bomb, gravity_bomb
dumbwaiter, food_elevator
dumdum, dumdum_bullet
dumpcart
Dumpster
dump_truck, dumper, tipper_truck, tipper_lorry, tip_truck, tipper
Dumpy_level
dunce_cap, dunce's_cap, fool's_cap
dune_buggy, beach_buggy
dungeon
duplex_apartment, duplex
duplex_house, duplex, semidetached_house
duplicator, copier
dust_bag, vacuum_bag
dustcloth, dustrag, duster
dust_cover
dust_cover, dust_sheet
dustmop, dust_mop, dry_mop
dustpan
Dutch_oven
Dutch_oven
dwelling, home, domicile, abode, habitation, dwelling_house
dye-works
dynamo
dynamometer, ergometer
Eames_chair
earflap, earlap
early_warning_radar
early_warning_system
earmuff
earphone, earpiece, headphone, phone
earplug
earplug
earthenware
earthwork
easel
easy_chair, lounge_chair, overstuffed_chair
eaves
ecclesiastical_attire, ecclesiastical_robe
echinus
echocardiograph
edger
edge_tool
efficiency_apartment
egg-and-dart, egg-and-anchor, egg-and-tongue
eggbeater, eggwhisk
egg_timer
eiderdown, duvet, continental_quilt
eight_ball
ejection_seat, ejector_seat, capsule
elastic
elastic_bandage
Elastoplast
elbow
elbow_pad
electric, electric_automobile, electric_car
electrical_cable
electrical_contact
electrical_converter
electrical_device
electrical_system
electric_bell
electric_blanket
electric_chair, chair, death_chair, hot_seat
electric_clock
electric-discharge_lamp, gas-discharge_lamp
electric_fan, blower
electric_frying_pan
electric_furnace
electric_guitar
electric_hammer
electric_heater, electric_fire
electric_lamp
electric_locomotive
electric_meter, power_meter
electric_mixer
electric_motor
electric_organ, electronic_organ, Hammond_organ, organ
electric_range
electric_refrigerator, fridge
electric_toothbrush
electric_typewriter
electro-acoustic_transducer
electrode
electrodynamometer
electroencephalograph
electrograph
electrolytic, electrolytic_capacitor, electrolytic_condenser
electrolytic_cell
electromagnet
electrometer
electromyograph
electron_accelerator
electron_gun
electronic_balance
electronic_converter
electronic_device
electronic_equipment
electronic_fetal_monitor, electronic_foetal_monitor, fetal_monitor, foetal_monitor
electronic_instrument, electronic_musical_instrument
electronic_voltmeter
electron_microscope
electron_multiplier
electrophorus
electroscope
electrostatic_generator, electrostatic_machine, Wimshurst_machine, Van_de_Graaff_generator
electrostatic_printer
elevator, lift
elevator
elevator_shaft
embankment
embassy
embellishment
emergency_room, ER
emesis_basin
emitter
empty
emulsion, photographic_emulsion
enamel
enamel
enamelware
encaustic
encephalogram, pneumoencephalogram
enclosure
endoscope
energizer, energiser
engine
engine
engineering, engine_room
enginery
English_horn, cor_anglais
English_saddle, English_cavalry_saddle
enlarger
ensemble
ensign
entablature
entertainment_center
entrenching_tool, trenching_spade
entrenchment, intrenchment
envelope
envelope
envelope, gasbag
eolith
epauliere
epee
epergne
epicyclic_train, epicyclic_gear_train
epidiascope
epilating_wax
equalizer, equaliser
equatorial
equipment
erasable_programmable_read-only_memory, EPROM
eraser
erecting_prism
erection
Erlenmeyer_flask
escape_hatch
escapement
escape_wheel
escarpment, escarp, scarp, protective_embankment
escutcheon, scutcheon
esophagoscope, oesophagoscope
espadrille
espalier
espresso_maker
espresso_shop
establishment
estaminet
estradiol_patch
etagere
etamine, etamin
etching
ethernet
ethernet_cable
Eton_jacket
etui
eudiometer
euphonium
evaporative_cooler
evening_bag
exercise_bike, exercycle
exercise_device
exhaust, exhaust_system
exhaust_fan
exhaust_valve
exhibition_hall, exhibition_area
Exocet
expansion_bit, expansive_bit
expansion_bolt
explosive_detection_system, EDS
explosive_device
explosive_trace_detection, ETD
express, limited
extension, telephone_extension, extension_phone
extension_cord
external-combustion_engine
external_drive
extractor
eyebrow_pencil
eyecup, eyebath, eye_cup
eyeliner
eyepatch, patch
eyepiece, ocular
eyeshadow
fabric, cloth, material, textile
facade, frontage, frontal
face_guard
face_mask
faceplate
face_powder
face_veil
facing, cladding
facing
facing, veneer
facsimile, facsimile_machine, fax
factory, mill, manufacturing_plant, manufactory
factory_ship
fagot, faggot
fagot_stitch, faggot_stitch
Fahrenheit_thermometer
faience
faille
fairlead
fairy_light
falchion
fallboard, fall-board
fallout_shelter
false_face
false_teeth
family_room
fan
fan_belt
fan_blade
fancy_dress, masquerade, masquerade_costume
fanion
fanlight
fanjet, fan-jet, fanjet_engine, turbojet, turbojet_engine, turbofan, turbofan_engine
fanjet, fan-jet, turbofan, turbojet
fanny_pack, butt_pack
fan_tracery
fan_vaulting
farm_building
farmer's_market, green_market, greenmarket
farmhouse
farm_machine
farmplace, farm-place, farmstead
farmyard
farthingale
fastener, fastening, holdfast, fixing
fast_reactor
fat_farm
fatigues
faucet, spigot
fauld
fauteuil
feather_boa, boa
featheredge
fedora, felt_hat, homburg, Stetson, trilby
feedback_circuit, feedback_loop
feedlot
fell, felled_seam
felloe, felly
felt
felt-tip_pen, felt-tipped_pen, felt_tip, Magic_Marker
felucca
fence, fencing
fencing_mask, fencer's_mask
fencing_sword
fender, wing
fender, buffer, cowcatcher, pilot
Ferris_wheel
ferrule, collet
ferry, ferryboat
ferule
festoon
fetoscope, foetoscope
fetter, hobble
fez, tarboosh
fiber, fibre, vulcanized_fiber
fiber_optic_cable, fibre_optic_cable
fiberscope
fichu
fiddlestick, violin_bow
field_artillery, field_gun
field_coil, field_winding
field-effect_transistor, FET
field-emission_microscope
field_glass, glass, spyglass
field_hockey_ball
field_hospital
field_house, sports_arena
field_lens
field_magnet
field-sequential_color_television, field-sequential_color_TV, field-sequential_color_television_system, field-sequential_color_TV_system
field_tent
fieldwork
fife
fifth_wheel, spare
fighter, fighter_aircraft, attack_aircraft
fighting_chair
fig_leaf
figure_eight, figure_of_eight
figure_loom, figured-fabric_loom
figure_skate
filament
filature
file
file, file_cabinet, filing_cabinet
file_folder
file_server
filigree, filagree, fillagree
filling
film, photographic_film
film, plastic_film
film_advance
filter
filter
finder, viewfinder, view_finder
finery
fine-tooth_comb, fine-toothed_comb
finger
fingerboard
finger_bowl
finger_paint, fingerpaint
finger-painting
finger_plate, escutcheon, scutcheon
fingerstall, cot
finish_coat, finishing_coat
finish_coat, finishing_coat
finisher
fin_keel
fipple
fipple_flute, fipple_pipe, recorder, vertical_flute
fire
fire_alarm, smoke_alarm
firearm, piece, small-arm
fire_bell
fireboat
firebox
firebrick
fire_control_radar
fire_control_system
fire_engine, fire_truck
fire_extinguisher, extinguisher, asphyxiator
fire_iron
fireman's_ax, fireman's_axe
fireplace, hearth, open_fireplace
fire_screen, fireguard
fire_tongs, coal_tongs
fire_tower
firewall
firing_chamber, gun_chamber
firing_pin
firkin
firmer_chisel
first-aid_kit
first-aid_station
first_base
first_class
fishbowl, fish_bowl, goldfish_bowl
fisherman's_bend
fisherman's_knot, true_lover's_knot, truelove_knot
fisherman's_lure, fish_lure
fishhook
fishing_boat, fishing_smack, fishing_vessel
fishing_gear, tackle, fishing_tackle, fishing_rig, rig
fishing_rod, fishing_pole
fish_joint
fish_knife
fishnet, fishing_net
fish_slice
fitment
fixative
fixer-upper
flag
flageolet, treble_recorder, shepherd's_pipe
flagon
flagpole, flagstaff
flagship
flail
flambeau
flamethrower
flange, rim
flannel
flannel, gabardine, tweed, white
flannelette
flap, flaps
flash, photoflash, flash_lamp, flashgun, flashbulb, flash_bulb
flash
flash_camera
flasher
flashlight, torch
flashlight_battery
flash_memory
flask
flat_arch, straight_arch
flatbed
flatbed_press, cylinder_press
flat_bench
flatcar, flatbed, flat
flat_file
flatlet
flat_panel_display, FPD
flats
flat_tip_screwdriver
fleece
fleet_ballistic_missile_submarine
fleur-de-lis, fleur-de-lys
flight_simulator, trainer
flintlock
flintlock, firelock
flip-flop, thong
flipper, fin
float, plasterer's_float
floating_dock, floating_dry_dock
floatplane, pontoon_plane
flood, floodlight, flood_lamp, photoflood
floor, flooring
floor, level, storey, story
floor
floorboard
floor_cover, floor_covering
floor_joist
floor_lamp
flophouse, dosshouse
florist, florist_shop, flower_store
floss
flotsam, jetsam
flour_bin
flour_mill
flowerbed, flower_bed, bed_of_flowers
flugelhorn, fluegelhorn
fluid_drive
fluid_flywheel
flume
fluorescent_lamp
fluoroscope, roentgenoscope
flush_toilet, lavatory
flute, transverse_flute
flute, flute_glass, champagne_flute
flux_applicator
fluxmeter
fly
flying_boat
flying_buttress, arc-boutant
flying_carpet
flying_jib
fly_rod
fly_tent
flytrap
flywheel
fob, watch_chain, watch_guard
foghorn
foglamp
foil
fold, sheepfold, sheep_pen, sheepcote
folder
folding_chair
folding_door, accordion_door
folding_saw
food_court
food_processor
food_hamper
foot
footage
football
football_helmet
football_stadium
footbath
foot_brake
footbridge, overcrossing, pedestrian_bridge
foothold, footing
footlocker, locker
foot_rule
footstool, footrest, ottoman, tuffet
footwear, footgear
footwear
forceps
force_pump
fore-and-after
fore-and-aft_sail
forecastle, fo'c'sle
forecourt
foredeck
fore_edge, foredge
foreground
foremast
fore_plane
foresail
forestay
foretop
fore-topmast
fore-topsail
forge
fork
forklift
formalwear, eveningwear, evening_dress, evening_clothes
Formica
fortification, munition
fortress, fort
forty-five
Foucault_pendulum
foulard
foul-weather_gear
foundation_garment, foundation
foundry, metalworks
fountain
fountain_pen
four-in-hand
four-poster
four-pounder
four-stroke_engine, four-stroke_internal-combustion_engine
four-wheel_drive, 4WD
four-wheel_drive, 4WD
four-wheeler
fowling_piece
foxhole, fox_hole
fragmentation_bomb, antipersonnel_bomb, anti-personnel_bomb, daisy_cutter
frail
fraise
frame, framing
frame
frame_buffer
framework
Francis_turbine
franking_machine
free_house
free-reed
free-reed_instrument
freewheel
freight_car
freight_elevator, service_elevator
freight_liner, liner_train
freight_train, rattler
French_door
French_horn, horn
French_polish, French_polish_shellac
French_roof
French_window
Fresnel_lens
fret
friary
friction_clutch
frieze
frieze
frigate
frigate
frill, flounce, ruffle, furbelow
Frisbee
frock
frock_coat
frontlet, frontal
front_porch
front_projector
fruit_machine
frying_pan, frypan, skillet
fuel_filter
fuel_gauge, fuel_indicator
fuel_injection, fuel_injection_system
fuel_system
full-dress_uniform
full_metal_jacket
full_skirt
fumigator
funeral_home, funeral_parlor, funeral_parlour, funeral_chapel, funeral_church, funeral-residence
funnel
funny_wagon
fur
fur_coat
fur_hat
furnace
furnace_lining, refractory
furnace_room
furnishing
furnishing, trappings
furniture, piece_of_furniture, article_of_furniture
fur-piece
furrow
fuse, electrical_fuse, safety_fuse
fusee_drive, fusee
fuselage
fusil
fustian
futon
gabardine
gable, gable_end, gable_wall
gable_roof, saddle_roof, saddleback, saddleback_roof
gadgetry
gaff
gaff
gaff
gaffsail, gaff-headed_sail
gaff_topsail, fore-and-aft_topsail
gag, muzzle
gaiter
gaiter
Galilean_telescope
galleon
gallery
gallery, art_gallery, picture_gallery
galley, ship's_galley, caboose, cookhouse
galley
galley
gallows
gallows_tree, gallows-tree, gibbet, gallous
galvanometer
gambling_house, gambling_den, gambling_hell, gaming_house
gambrel, gambrel_roof
game
gamebag
game_equipment
gaming_table
gamp, brolly
gangplank, gangboard, gangway
gangsaw
gangway
gantlet
gantry, gauntry
garage
garage, service_department
Garand_rifle, Garand, M-1, M-1_rifle
garbage
garbage_truck, dustcart
garboard, garboard_plank, garboard_strake
garden
garden
garden_rake
garden_spade
garden_tool, lawn_tool
garden_trowel
gargoyle
garibaldi
garlic_press
garment
garment_bag
garrison_cap, overseas_cap
garrote, garotte, garrotte, iron_collar
garter, supporter
garter_belt, suspender_belt
garter_stitch
gas_guzzler
gas_shell
gas_bracket
gas_burner, gas_jet
gas-cooled_reactor
gas-discharge_tube
gas_engine
gas_fixture
gas_furnace
gas_gun
gas_heater
gas_holder, gasometer
gasket
gas_lamp
gas_maser
gasmask, respirator, gas_helmet
gas_meter, gasometer
gasoline_engine, petrol_engine
gasoline_gauge, gasoline_gage, gas_gauge, gas_gage, petrol_gauge, petrol_gage
gas_oven
gas_oven
gas_pump, gasoline_pump, petrol_pump, island_dispenser
gas_range, gas_stove, gas_cooker
gas_ring
gas_tank, gasoline_tank, petrol_tank
gas_thermometer, air_thermometer
gastroscope
gas_turbine
gas-turbine_ship
gat, rod
gate
gatehouse
gateleg_table
gatepost
gathered_skirt
Gatling_gun
gauge, gage
gauntlet, gantlet
gauntlet, gantlet, metal_glove
gauze, netting, veiling
gauze, gauze_bandage
gavel
gazebo, summerhouse
gear, gear_wheel, geared_wheel, cogwheel
gear, paraphernalia, appurtenance
gear, gear_mechanism
gearbox, gear_box, gear_case
gearing, gear, geartrain, power_train, train
gearset
gearshift, gearstick, shifter, gear_lever
Geiger_counter, Geiger-Muller_counter
Geiger_tube, Geiger-Muller_tube
gene_chip, DNA_chip
general-purpose_bomb, GP_bomb
generator
generator
generator
Geneva_gown
geodesic_dome
georgette
gharry
ghat
ghetto_blaster, boom_box
gift_shop, novelty_shop
gift_wrapping
gig
gig
gig
gig
gildhall
gill_net
gilt, gilding
gimbal
gingham
girandole, girandola
girder
girdle, cincture, sash, waistband, waistcloth
glass, drinking_glass
glass
glass_cutter
glasses_case
glebe_house
Glengarry
glider, sailplane
Global_Positioning_System, GPS
glockenspiel, orchestral_bells
glory_hole, lazaretto
glove
glove_compartment
glow_lamp
glow_tube
glyptic_art, glyptography
glyptics, lithoglyptics
gnomon
goal
goalmouth
goalpost
goblet
godown
goggles
go-kart
gold_plate
golf_bag
golf_ball
golfcart, golf_cart
golf_club, golf-club, club
golf-club_head, club_head, club-head, clubhead
golf_equipment
golf_glove
golliwog, golliwogg
gondola
gong, tam-tam
goniometer
Gordian_knot
gorget
gossamer
Gothic_arch
gouache
gouge
gourd, calabash
government_building
government_office
gown
gown, robe
gown, surgical_gown, scrubs
grab
grab_bag
grab_bar
grace_cup
grade_separation
graduated_cylinder
graffito, graffiti
gramophone, acoustic_gramophone
granary, garner
grandfather_clock, longcase_clock
grand_piano, grand
graniteware
granny_knot, granny
grape_arbor, grape_arbour
grapnel, grapnel_anchor
grapnel, grapple, grappler, grappling_hook, grappling_iron
grass_skirt
grate, grating
grate, grating
grater
graver, graving_tool, pointel, pointrel
gravestone, headstone, tombstone
gravimeter, gravity_meter
gravure, photogravure, heliogravure
gravy_boat, gravy_holder, sauceboat, boat
grey, gray
grease-gun, gun
greasepaint
greasy_spoon
greatcoat, overcoat, topcoat
great_hall
greave, jambeau
greengrocery
greenhouse, nursery, glasshouse
grenade
grid, gridiron
griddle
grill, grille, grillwork
grille, radiator_grille
grillroom, grill
grinder
grinding_wheel, emery_wheel
grindstone
gripsack
gristmill
grocery_bag
grocery_store, grocery, food_market, market
grogram
groined_vault
groover
grosgrain
gros_point
ground, earth
ground_bait
ground_control
ground_floor, first_floor, ground_level
groundsheet, ground_cloth
G-string, thong
guard, safety, safety_device
guard_boat
guardroom
guardroom
guard_ship
guard's_van
gueridon
Guarnerius
guesthouse
guestroom
guidance_system, guidance_device
guided_missile
guided_missile_cruiser
guided_missile_frigate
guildhall
guilloche
guillotine
guimpe
guimpe
guitar
guitar_pick
gulag
gun
gunboat
gun_carriage
gun_case
gun_emplacement, weapons_emplacement
gun_enclosure, gun_turret, turret
gunlock, firing_mechanism
gunnery
gunnysack, gunny_sack, burlap_bag
gun_pendulum
gun_room
gunsight, gun-sight
gun_trigger, trigger
gurney
gusher
gusset, inset
gusset, gusset_plate
guy, guy_cable, guy_wire, guy_rope
gymnastic_apparatus, exerciser
gym_shoe, sneaker, tennis_shoe
gym_suit
gymslip
gypsy_cab
gyrocompass
gyroscope, gyro
gyrostabilizer, gyrostabiliser
habergeon
habit
habit, riding_habit
hacienda
hacksaw, hack_saw, metal_saw
haft, helve
hairbrush
haircloth, hair
hairdressing, hair_tonic, hair_oil, hair_grease
hairnet
hairpiece, false_hair, postiche
hairpin
hair_shirt
hair_slide
hair_spray
hairspring
hair_trigger
halberd
half_binding
half_hatchet
half_hitch
half_track
hall
hall
hall
Hall_of_Fame
hall_of_residence
hallstand
halter
halter, hackamore
hame
hammer
hammer, power_hammer
hammer
hammerhead
hammock, sack
hamper
hand
handball
handbarrow
handbell
hand_blower, blow_dryer, blow_drier, hair_dryer, hair_drier
handbow
hand_brake, emergency, emergency_brake, parking_brake
hand_calculator, pocket_calculator
handcar
handcart, pushcart, cart, go-cart
hand_cream
handcuff, cuff, handlock, manacle
hand_drill, handheld_drill
hand_glass, simple_microscope, magnifying_glass
hand_glass, hand_mirror
hand_grenade
hand-held_computer, hand-held_microcomputer
handhold
handkerchief, hankie, hanky, hankey
handlebar
handloom
hand_lotion
hand_luggage
hand-me-down
hand_mower
hand_pump
handrest
handsaw, hand_saw, carpenter's_saw
handset, French_telephone
hand_shovel
handspike
handstamp, rubber_stamp
hand_throttle
hand_tool
hand_towel, face_towel
hand_truck, truck
handwear, hand_wear
handwheel
handwheel
hangar_queen
hanger
hang_glider
hangman's_rope, hangman's_halter, halter, hemp, hempen_necktie
hank
hansom, hansom_cab
harbor, harbour
hard_disc, hard_disk, fixed_disk
hard_hat, tin_hat, safety_hat
hardtop
hardware, ironware
hardware_store, ironmonger, ironmonger's_shop
harmonica, mouth_organ, harp, mouth_harp
harmonium, organ, reed_organ
harness
harness
harp
harp
harpoon
harpoon_gun
harpoon_log
harpsichord, cembalo
Harris_Tweed
harrow
harvester, reaper
hash_house
hasp
hat, chapeau, lid
hatbox
hatch
hatchback, hatchback_door
hatchback
hatchel, heckle
hatchet
hatpin
hauberk, byrnie
Hawaiian_guitar, steel_guitar
hawse, hawsehole, hawsepipe
hawser
hawser_bend
hay_bale
hayfork
hayloft, haymow, mow
haymaker, hay_conditioner
hayrack, hayrig
hayrack
hazard
head
head
head
headboard
head_covering, veil
headdress, headgear
header
header
header, coping, cope
header, lintel
headfast
head_gasket
head_gate
headgear
headlight, headlamp
headpiece
headpin, kingpin
headquarters, central_office, main_office, home_office, home_base
headrace
headrest
headsail
headscarf
headset
head_shop
headstall, headpiece
headstock
health_spa, spa, health_club
hearing_aid, ear_trumpet
hearing_aid, deaf-aid
hearse
hearth, fireside
hearthrug
heart-lung_machine
heat_engine
heater, warmer
heat_exchanger
heating_pad, hot_pad
heat_lamp, infrared_lamp
heat_pump
heat-seeking_missile
heat_shield
heat_sink
heaume
heaver
heavier-than-air_craft
heckelphone, basset_oboe
hectograph, heliotype
hedge, hedgerow
hedge_trimmer
helicon, bombardon
helicopter, chopper, whirlybird, eggbeater
heliograph
heliometer
helm
helmet
helmet
hematocrit, haematocrit
hemming-stitch
hemostat, haemostat
hemstitch, hemstitching
henroost
heraldry
hermitage
herringbone
herringbone, herringbone_pattern
Herschelian_telescope, off-axis_reflector
Hessian_boot, hessian, jackboot, Wellington, Wellington_boot
heterodyne_receiver, superheterodyne_receiver, superhet
hibachi
hideaway, retreat
hi-fi, high_fidelity_sound_system
high_altar
high-angle_gun
highball_glass
highboard
highboy, tallboy
highchair, feeding_chair
high_gear, high
high-hat_cymbal, high_hat
highlighter
highlighter
high-pass_filter
high-rise, tower_block
high_table
high-warp_loom
hijab
hinge, flexible_joint
hinging_post, swinging_post
hip_boot, thigh_boot
hipflask, pocket_flask
hip_pad
hip_pocket
hippodrome
hip_roof, hipped_roof
hitch
hitch
hitching_post
hitchrack, hitching_bar
hob
hobble_skirt
hockey_skate
hockey_stick
hod
hodoscope
hoe
hoe_handle
hogshead
hoist
hold, keep
holder
holding_cell
holding_device
holding_pen, holding_paddock, holding_yard
hollowware, holloware
holster
holster
holy_of_holies, sanctum_sanctorum
home, nursing_home, rest_home
home_appliance, household_appliance
home_computer
home_plate, home_base, home, plate
home_room, homeroom
homespun
homestead
home_theater, home_theatre
homing_torpedo
hone
honeycomb
hood, bonnet, cowl, cowling
hood
hood
hood, exhaust_hood
hood
hood_latch
hook
hook, claw
hook
hookah, narghile, nargileh, sheesha, shisha, chicha, calean, kalian, water_pipe, hubble-bubble, hubbly-bubbly
hook_and_eye
hookup, assemblage
hookup
hook_wrench, hook_spanner
hoopskirt, crinoline
hoosegow, hoosgow
Hoover
hope_chest, wedding_chest
hopper
hopsacking, hopsack
horizontal_bar, high_bar
horizontal_stabilizer, horizontal_stabiliser, tailplane
horizontal_tail
horn
horn
horn
horn_button
hornpipe, pibgorn, stockhorn
horse, gymnastic_horse
horsebox
horsecar
horse_cart, horse-cart
horsecloth
horse-drawn_vehicle
horsehair
horsehair_wig
horseless_carriage
horse_pistol, horse-pistol
horseshoe, shoe
horseshoe
horse-trail
horsewhip
hose
hosiery, hose
hospice
hospital, infirmary
hospital_bed
hospital_room
hospital_ship
hospital_train
hostel, youth_hostel, student_lodging
hostel, hostelry, inn, lodge, auberge
hot-air_balloon
hotel
hotel-casino, casino-hotel
hotel-casino, casino-hotel
hotel_room
hot_line
hot_pants
hot_plate, hotplate
hot_rod, hot-rod
hot_spot, hotspot
hot_tub
hot-water_bottle, hot-water_bag
houndstooth_check, hound's-tooth_check, dogstooth_check, dogs-tooth_check, dog's-tooth_check
hourglass
hour_hand, little_hand
house
house
houseboat
houselights
house_of_cards, cardhouse, card-house, cardcastle
house_of_correction
house_paint, housepaint
housetop
housing, lodging, living_accommodations
hovel, hut, hutch, shack, shanty
hovercraft, ground-effect_machine
howdah, houdah
huarache, huaraches
hub-and-spoke, hub-and-spoke_system
hubcap
huck, huckaback
hug-me-tight
hula-hoop
hulk
hull
humeral_veil, veil
Humvee, Hum-Vee
hunter, hunting_watch
hunting_knife
hurdle
hurricane_deck, hurricane_roof, promenade_deck, awning_deck
hurricane_lamp, hurricane_lantern, tornado_lantern, storm_lantern, storm_lamp
hut, army_hut, field_hut
hutch
hutment
hydraulic_brake, hydraulic_brakes
hydraulic_press
hydraulic_pump, hydraulic_ram
hydraulic_system
hydraulic_transmission, hydraulic_transmission_system
hydroelectric_turbine
hydrofoil, hydroplane
hydrofoil, foil
hydrogen_bomb, H-bomb, fusion_bomb, thermonuclear_bomb
hydrometer, gravimeter
hygrodeik
hygrometer
hygroscope
hyperbaric_chamber
hypercoaster
hypermarket
hypodermic_needle
hypodermic_syringe, hypodermic, hypo
hypsometer
hysterosalpingogram
I-beam
ice_ax, ice_axe, piolet
iceboat, ice_yacht, scooter
icebreaker, iceboat
iced-tea_spoon
ice_hockey_rink, ice-hockey_rink
ice_machine
ice_maker
ice_pack, ice_bag
icepick, ice_pick
ice_rink, ice-skating_rink, ice
ice_skate
ice_tongs
icetray
iconoscope
Identikit, Identikit_picture
idle_pulley, idler_pulley, idle_wheel
igloo, iglu
ignition_coil
ignition_key
ignition_switch
imaret
immovable_bandage
impact_printer
impeller
implant
implement
impression
imprint
improvised_explosive_device, I.E.D., IED
impulse_turbine
in-basket, in-tray
incendiary_bomb, incendiary, firebomb
incinerator
inclined_plane
inclinometer, dip_circle
inclinometer
incrustation, encrustation
incubator, brooder
index_register
Indiaman
Indian_club
indicator
induction_coil
inductor, inductance
industrial_watercourse
inertial_guidance_system, inertial_navigation_system
inflater, inflator
inhaler, inhalator
injector
ink_bottle, inkpot
ink_eraser
ink-jet_printer
inkle
inkstand
inkwell, inkstand
inlay
inside_caliper
insole, innersole
instep
instillator
institution
instrument
instrument_of_punishment
instrument_of_torture
intaglio, diaglyph
intake_valve
integrated_circuit, microcircuit
integrator, planimeter
Intelnet
interceptor
interchange
intercommunication_system, intercom
intercontinental_ballistic_missile, ICBM
interface, port
interferometer
interior_door
internal-combustion_engine, ICE
internal_drive
internet, net, cyberspace
interphone
interrupter
intersection, crossroad, crossway, crossing, carrefour
interstice
intraocular_lens
intravenous_pyelogram, IVP
inverter
ion_engine
ionization_chamber, ionization_tube
iPod
video_iPod
iron, smoothing_iron
iron
iron, branding_iron
irons, chains
ironclad
iron_foundry
iron_horse
ironing
iron_lung
ironmongery
ironworks
irrigation_ditch
izar
jabot
jack
jack, jackstones
jack
jack
jacket
jacket
jacket
jack-in-the-box
jack-o'-lantern
jack_plane
Jacob's_ladder, jack_ladder, pilot_ladder
jaconet
Jacquard_loom, Jacquard
jacquard
jag, dag
jail, jailhouse, gaol, clink, slammer, poky, pokey
jalousie
jamb
jammer
jampot, jamjar
japan
jar
Jarvik_heart, Jarvik_artificial_heart
jaunting_car, jaunty_car
javelin
jaw
Jaws_of_Life
jean, blue_jean, denim
jeep, landrover
jellaba
jerkin
jeroboam, double-magnum
jersey
jersey, T-shirt, tee_shirt
jet, jet_plane, jet-propelled_plane
jet_bridge
jet_engine
jetliner
jeweler's_glass
jewelled_headdress, jeweled_headdress
jew's_harp, jews'_harp, mouth_bow
jib
jibboom
jig
jig
jiggermast, jigger
jigsaw, scroll_saw, fretsaw
jigsaw_puzzle
jinrikisha, ricksha, rickshaw
jobcentre
jodhpurs, jodhpur_breeches, riding_breeches
jodhpur, jodhpur_boot, jodhpur_shoe
joinery
joint
Joint_Direct_Attack_Munition, JDAM
jointer, jointer_plane, jointing_plane, long_plane
joist
jolly_boat, jolly
jorum
joss_house
journal_bearing
journal_box
joystick
jungle_gym
junk
jug
jukebox, nickelodeon
jumbojet, jumbo_jet
jumper, pinafore, pinny
jumper
jumper
jumper
jumper_cable, jumper_lead, lead, booster_cable
jump_seat
jump_suit
jump_suit, jumpsuit
junction
junction, conjunction
junction_barrier, barrier_strip
junk_shop
jury_box
jury_mast
kachina
kaffiyeh
kalansuwa
Kalashnikov
kameez
kanzu
katharometer
kayak
kazoo
keel
keelboat
keelson
keep, donjon, dungeon
keg
kennel, doghouse, dog_house
kepi, peaked_cap, service_cap, yachting_cap
keratoscope
kerchief
ketch
kettle, boiler
kettle, kettledrum, tympanum, tympani, timpani
key
key
keyboard
keyboard_buffer
keyboard_instrument
keyhole
keyhole_saw
khadi, khaddar
khaki
khakis
khimar
khukuri
kick_pleat
kicksorter, pulse_height_analyzer
kickstand
kick_starter, kick_start
kid_glove, suede_glove
kiln
kilt
kimono
kinescope, picture_tube, television_tube
Kinetoscope
king
king
kingbolt, kingpin, swivel_pin
king_post
Kipp's_apparatus
kirk
kirpan
kirtle
kirtle
kit, outfit
kit
kitbag, kit_bag
kitchen
kitchen_appliance
kitchenette
kitchen_table
kitchen_utensil
kitchenware
kite_balloon
klaxon, claxon
klieg_light
klystron
knee_brace
knee-high, knee-hi
knee_pad
knee_piece
knife
knife
knife_blade
knight, horse
knit
knitting_machine
knitting_needle
knitwear
knob, boss
knob, pommel
knobble
knobkerrie, knobkerry
knocker, doorknocker, rapper
knot
knuckle_joint, hinge_joint
kohl
koto
kraal
kremlin
kris, creese, crease
krummhorn, crumhorn, cromorne
Kundt's_tube
Kurdistan
kurta
kylix, cylix
kymograph, cymograph
lab_bench, laboratory_bench
lab_coat, laboratory_coat
lace
lacquer
lacquerware
lacrosse_ball
ladder-back
ladder-back, ladder-back_chair
ladder_truck, aerial_ladder_truck
ladies'_room, powder_room
ladle
lady_chapel
lagerphone
lag_screw, lag_bolt
lake_dwelling, pile_dwelling
lally, lally_column
lamasery
lambrequin
lame
laminar_flow_clean_room
laminate
lamination
lamp
lamp
lamp_house, lamphouse, lamp_housing
lamppost
lampshade, lamp_shade
lanai
lancet_arch, lancet
lancet_window
landau
lander
landing_craft
landing_flap
landing_gear
landing_net
landing_skid
land_line, landline
land_mine, ground-emplaced_mine, booby_trap
land_office
lanolin
lantern
lanyard, laniard
lap, lap_covering
laparoscope
lapboard
lapel
lap_joint, splice
laptop, laptop_computer
laryngoscope
laser, optical_maser
laser-guided_bomb, LGB
laser_printer
lash, thong
lashing
lasso, lariat, riata, reata
latch
latch, door_latch
latchet
latchkey
lateen, lateen_sail
latex_paint, latex, rubber-base_paint
lath
lathe
latrine
lattice, latticework, fretwork
launch
launcher, rocket_launcher
laundry, wash, washing, washables
laundry_cart
laundry_truck
lavalava
lavaliere, lavalier, lavalliere
laver
lawn_chair, garden_chair
lawn_furniture
lawn_mower, mower
layette
lead-acid_battery, lead-acid_accumulator
lead-in
leading_rein
lead_pencil
leaf_spring
lean-to
lean-to_tent
leash, tether, lead
leatherette, imitation_leather
leather_strip
Leclanche_cell
lectern, reading_desk
lecture_room
lederhosen
ledger_board
leg
leg
legging, leging, leg_covering
Leiden_jar, Leyden_jar
leisure_wear
lens, lense, lens_system
lens, electron_lens
lens_cap, lens_cover
lens_implant, interocular_lens_implant, IOL
leotard, unitard, body_suit, cat_suit
letter_case
letter_opener, paper_knife, paperknife
levee
level, spirit_level
lever
lever, lever_tumbler
lever
lever_lock
Levi's, levis
Liberty_ship
library
library
lid
Liebig_condenser
lie_detector
lifeboat
life_buoy, lifesaver, life_belt, life_ring
life_jacket, life_vest, cork_jacket
life_office
life_preserver, preserver, flotation_device
life-support_system, life_support
life-support_system, life_support
lifting_device
lift_pump
ligament
ligature
light, light_source
light_arm
light_bulb, lightbulb, bulb, incandescent_lamp, electric_light, electric-light_bulb
light_circuit, lighting_circuit
light-emitting_diode, LED
lighter, light, igniter, ignitor
lighter-than-air_craft
light_filter, diffusing_screen
lighting
light_machine_gun
light_meter, exposure_meter, photometer
light_microscope
lightning_rod, lightning_conductor
light_pen, electronic_stylus
lightship
Lilo
limber
limekiln
limiter, clipper
limousine, limo
linear_accelerator, linac
linen
line_printer, line-at-a-time_printer
liner, ocean_liner
liner, lining
lingerie, intimate_apparel
lining, liner
link, data_link
linkage
Link_trainer
linocut
linoleum_knife, linoleum_cutter
Linotype, Linotype_machine
linsey-woolsey
linstock
lion-jaw_forceps
lip-gloss
lipstick, lip_rouge
liqueur_glass
liquid_crystal_display, LCD
liquid_metal_reactor
lisle
lister, lister_plow, lister_plough, middlebreaker, middle_buster
litterbin, litter_basket, litter-basket
little_theater, little_theatre
live_axle, driving_axle
living_quarters, quarters
living_room, living-room, sitting_room, front_room, parlor, parlour
load
Loafer
loaner
lobe
lobster_pot
local
local_area_network, LAN
local_oscillator, heterodyne_oscillator
Lochaber_ax
lock
lock, ignition_lock
lock, lock_chamber
lock
lockage
locker
locker_room
locket
lock-gate
locking_pliers
lockring, lock_ring, lock_washer
lockstitch
lockup
locomotive, engine, locomotive_engine, railway_locomotive
lodge, indian_lodge
lodge, hunting_lodge
lodge
lodging_house, rooming_house
loft, attic, garret
loft, pigeon_loft
loft
log_cabin
loggia
longbow
long_iron
long_johns
long_sleeve
long_tom
long_trousers, long_pants
long_underwear, union_suit
looking_glass, glass
lookout, observation_tower, lookout_station, observatory
loom
loop_knot
lorgnette
Lorraine_cross, cross_of_Lorraine
lorry, camion
lota
lotion
loudspeaker, speaker, speaker_unit, loudspeaker_system, speaker_system
lounge, waiting_room, waiting_area
lounger
lounging_jacket, smoking_jacket
lounging_pajama, lounging_pyjama
loungewear
loupe, jeweler's_loupe
louvered_window, jalousie
love_knot, lovers'_knot, lover's_knot, true_lovers'_knot, true_lover's_knot
love_seat, loveseat, tete-a-tete, vis-a-vis
loving_cup
lowboy
low-pass_filter
low-warp-loom
LP, L-P
L-plate
lubber's_hole
lubricating_system, force-feed_lubricating_system, force_feed, pressure-feed_lubricating_system, pressure_feed
luff
lug
luge
Luger
luggage_carrier
luggage_compartment, automobile_trunk, trunk
luggage_rack, roof_rack
lugger
lugsail, lug
lug_wrench
lumberjack, lumber_jacket
lumbermill, sawmill
lunar_excursion_module, lunar_module, LEM
lunchroom
lunette
lungi, lungyi, longyi
lunula
lusterware
lute
luxury_liner, express_luxury_liner
lyceum
lychgate, lichgate
lyre
machete, matchet, panga
machicolation
machine
machine, simple_machine
machine_bolt
machine_gun
machinery
machine_screw
machine_tool
machinist's_vise, metalworking_vise
machmeter
mackinaw
mackinaw, Mackinaw_boat
mackinaw, Mackinaw_coat
mackintosh, macintosh
macrame
madras
Mae_West, air_jacket
magazine_rack
magic_lantern
magnet
magnetic_bottle
magnetic_compass
magnetic_core_memory, core_memory
magnetic_disk, magnetic_disc, disk, disc
magnetic_head
magnetic_mine
magnetic_needle
magnetic_recorder
magnetic_stripe
magnetic_tape, mag_tape, tape
magneto, magnetoelectric_machine
magnetometer, gaussmeter
magnetron
magnifier
magnum
magnus_hitch
mail
mailbag, postbag
mailbag, mail_pouch
mailboat, mail_boat, packet, packet_boat
mailbox, letter_box
mail_car
maildrop
mailer
maillot
maillot, tank_suit
mailsorter
mail_train
mainframe, mainframe_computer
mainmast
main_rotor
mainsail
mainspring
main-topmast
main-topsail
main_yard
maisonette, maisonnette
majolica, maiolica
makeup, make-up, war_paint
Maksutov_telescope
malacca, malacca_cane
mallet, beetle
mallet, hammer
mallet
mammogram
mandola
mandolin
manger, trough
mangle
manhole
manhole_cover
man-of-war, ship_of_the_line
manometer
manor, manor_house
manor_hall, hall
MANPAD
mansard, mansard_roof
manse
mansion, mansion_house, manse, hall, residence
mantel, mantelpiece, mantle, mantlepiece, chimneypiece
mantelet, mantilla
mantilla
Mao_jacket
map
maquiladora
maraca
marble
marching_order
marimba, xylophone
marina
marker
marketplace, market_place, mart, market
marlinespike, marlinspike, marlingspike
marocain, crepe_marocain
marquee, marquise
marquetry, marqueterie
marriage_bed
martello_tower
martingale
mascara
maser
masher
mashie, five_iron
mashie_niblick, seven_iron
masjid, musjid
mask
mask
Masonite
Mason_jar
masonry
mason's_level
massage_parlor
massage_parlor
mass_spectrograph
mass_spectrometer, spectrometer
mast
mast
mastaba, mastabah
master_bedroom
masterpiece, chef-d'oeuvre
mat
mat, gym_mat
match, lucifer, friction_match
match
matchboard
matchbook
matchbox
matchlock
match_plane, tonguing_and_grooving_plane
matchstick
material
materiel, equipage
maternity_hospital
maternity_ward
matrix
Matthew_Walker, Matthew_Walker_knot
matting
mattock
mattress_cover
maul, sledge, sledgehammer
maulstick, mahlstick
Mauser
mausoleum
maxi
Maxim_gun
maximum_and_minimum_thermometer
maypole
maze, labyrinth
mazer
means
measure
measuring_cup
measuring_instrument, measuring_system, measuring_device
measuring_stick, measure, measuring_rod
meat_counter
meat_grinder
meat_hook
meat_house
meat_safe
meat_thermometer
mechanical_device
mechanical_piano, Pianola, player_piano
mechanical_system
mechanism
medical_building, health_facility, healthcare_facility
medical_instrument
medicine_ball
medicine_chest, medicine_cabinet
MEDLINE
megalith, megalithic_structure
megaphone
memorial, monument
memory, computer_memory, storage, computer_storage, store, memory_board
memory_chip
memory_device, storage_device
menagerie, zoo, zoological_garden
mending
menhir, standing_stone
menorah
Menorah
man's_clothing
men's_room, men's
mercantile_establishment, retail_store, sales_outlet, outlet
mercury_barometer
mercury_cell
mercury_thermometer, mercury-in-glass_thermometer
mercury-vapor_lamp
mercy_seat
merlon
mess, mess_hall
mess_jacket, monkey_jacket, shell_jacket
mess_kit
messuage
metal_detector
metallic
metal_screw
metal_wood
meteorological_balloon
meter
meterstick, metrestick
metronome
mezzanine, mezzanine_floor, entresol
mezzanine, first_balcony
microbalance
microbrewery
microfiche
microfilm
micrometer, micrometer_gauge, micrometer_caliper
microphone, mike
microprocessor
microscope
microtome
microwave, microwave_oven
microwave_diathermy_machine
microwave_linear_accelerator
middy, middy_blouse
midiron, two_iron
mihrab
mihrab
military_hospital
military_quarters
military_uniform
military_vehicle
milk_bar
milk_can
milk_float
milking_machine
milking_stool
milk_wagon, milkwagon
mill, grinder, milling_machinery
milldam
miller, milling_machine
milliammeter
millinery, woman's_hat
millinery, hat_shop
milling
millivoltmeter
millstone
millstone
millwheel, mill_wheel
mimeograph, mimeo, mimeograph_machine, Roneo, Roneograph
minaret
mincer, mincing_machine
mine
mine_detector
minelayer
mineshaft
minibar, cellaret
minibike, motorbike
minibus
minicar
minicomputer
ministry
miniskirt, mini
minisub, minisubmarine
minivan
miniver
mink, mink_coat
minster
mint
minute_hand, big_hand
Minuteman
mirror
missile
missile_defense_system, missile_defence_system
miter_box, mitre_box
miter_joint, mitre_joint, miter, mitre
mitten
mixer
mixer
mixing_bowl
mixing_faucet
mizzen, mizen
mizzenmast, mizenmast, mizzen, mizen
mobcap
mobile_home, manufactured_home
moccasin, mocassin
mock-up
mod_con
Model_T
modem
modillion
module
module
mohair
moire, watered-silk
mold, mould, cast
moldboard, mouldboard
moldboard_plow, mouldboard_plough
moleskin
Molotov_cocktail, petrol_bomb, gasoline_bomb
monastery
monastic_habit
moneybag
money_belt
monitor
monitor
monitor, monitoring_device
monkey-wrench, monkey_wrench
monk's_cloth
monochrome
monocle, eyeglass
monofocal_lens_implant, monofocal_IOL
monoplane
monotype
monstrance, ostensorium
mooring_tower, mooring_mast
Moorish_arch, horseshoe_arch
moped
mop_handle
moquette
morgue, mortuary, dead_room
morion, cabasset
morning_dress
morning_dress
morning_room
Morris_chair
mortar, howitzer, trench_mortar
mortar
mortarboard
mortise_joint, mortise-and-tenon_joint
mosaic
mosque
mosquito_net
motel
motel_room
Mother_Hubbard, muumuu
motion-picture_camera, movie_camera, cine-camera
motion-picture_film, movie_film, cine-film
motley
motley
motor
motorboat, powerboat
motorcycle, bike
motor_hotel, motor_inn, motor_lodge, tourist_court, court
motorized_wheelchair
motor_scooter, scooter
motor_vehicle, automotive_vehicle
mound, hill
mound, hill, pitcher's_mound
mount, setting
mountain_bike, all-terrain_bike, off-roader
mountain_tent
mouse, computer_mouse
mouse_button
mousetrap
mousse, hair_mousse, hair_gel
mouthpiece, embouchure
mouthpiece
mouthpiece, gumshield
movement
movie_projector, cine_projector, film_projector
moving-coil_galvanometer
moving_van
mud_brick
mudguard, splash_guard, splash-guard
mudhif
muff
muffle
muffler
mufti
mug
mulch
mule, scuff
multichannel_recorder
multiengine_airplane, multiengine_plane
multiplex
multiplexer
multiprocessor
multistage_rocket, step_rocket
munition, ordnance, ordnance_store
Murphy_bed
musette, shepherd's_pipe
musette_pipe
museum
mushroom_anchor
musical_instrument, instrument
music_box, musical_box
music_hall, vaudeville_theater, vaudeville_theatre
music_school
music_stand, music_rack
music_stool, piano_stool
musket
musket_ball, ball
muslin
mustache_cup, moustache_cup
mustard_plaster, sinapism
mute
muzzle_loader
muzzle
myelogram
nacelle
nail
nailbrush
nailfile
nailhead
nailhead
nail_polish, nail_enamel, nail_varnish
nainsook
Napier's_bones, Napier's_rods
nard, spikenard
narrowbody_aircraft, narrow-body_aircraft, narrow-body
narrow_wale
narthex
narthex
nasotracheal_tube
national_monument
nautilus, nuclear_submarine, nuclear-powered_submarine
navigational_system
naval_equipment
naval_gun
naval_missile
naval_radar
naval_tactical_data_system
naval_weaponry
nave
navigational_instrument
nebuchadnezzar
neckband
neck_brace
neckcloth, stock
neckerchief
necklace
necklet
neckline
neckpiece
necktie, tie
neckwear
needle
needle
needlenose_pliers
needlework, needlecraft
negative
negative_magnetic_pole, negative_pole, south-seeking_pole
negative_pole
negligee, neglige, peignoir, wrapper, housecoat
neolith
neon_lamp, neon_induction_lamp, neon_tube
nephoscope
nest
nest_egg
net, network, mesh, meshing, meshwork
net
net
net
network, electronic_network
network
neutron_bomb
newel
newel_post, newel
newspaper, paper
newsroom
newsroom
newsstand
Newtonian_telescope, Newtonian_reflector
nib, pen_nib
niblick, nine_iron
nicad, nickel-cadmium_accumulator
nickel-iron_battery, nickel-iron_accumulator
Nicol_prism
night_bell
nightcap
nightgown, gown, nightie, night-robe, nightdress
night_latch
night-light
nightshirt
nightwear, sleepwear, nightclothes
ninepin, skittle, skittle_pin
ninepin_ball, skittle_ball
ninon
nipple
nipple_shield
niqab
Nissen_hut, Quonset_hut
nogging
noisemaker
nonsmoker, nonsmoking_car
non-volatile_storage, nonvolatile_storage
Norfolk_jacket
noria
nosebag, feedbag
noseband, nosepiece
nose_flute
nosewheel
notebook, notebook_computer
nuclear-powered_ship
nuclear_reactor, reactor
nuclear_rocket
nuclear_weapon, atomic_weapon
nude, nude_painting
numdah, numdah_rug, nammad
nun's_habit
nursery, baby's_room
nut_and_bolt
nutcracker
nylon
nylons, nylon_stocking, rayons, rayon_stocking, silk_stocking
oar
oast
oast_house
obelisk
object_ball
objective, objective_lens, object_lens, object_glass
oblique_bandage
oboe, hautboy, hautbois
oboe_da_caccia
oboe_d'amore
observation_dome
observatory
obstacle
obturator
ocarina, sweet_potato
octant
odd-leg_caliper
odometer, hodometer, mileometer, milometer
oeil_de_boeuf
office, business_office
office_building, office_block
office_furniture
officer's_mess
off-line_equipment, auxiliary_equipment
ogee, cyma_reversa
ogee_arch, keel_arch
ohmmeter
oil, oil_color, oil_colour
oilcan
oilcloth
oil_filter
oil_heater, oilstove, kerosene_heater, kerosine_heater
oil_lamp, kerosene_lamp, kerosine_lamp
oil_paint
oil_pump
oil_refinery, petroleum_refinery
oilskin, slicker
oil_slick
oilstone
oil_tanker, oiler, tanker, tank_ship
old_school_tie
olive_drab
olive_drab, olive-drab_uniform
Olympian_Zeus
omelet_pan, omelette_pan
omnidirectional_antenna, nondirectional_antenna
omnirange, omnidirectional_range, omnidirectional_radio_range
onion_dome
open-air_market, open-air_marketplace, market_square
open_circuit
open-end_wrench, tappet_wrench
opener
open-hearth_furnace
openside_plane, rabbet_plane
open_sight
openwork
opera, opera_house
opera_cloak, opera_hood
operating_microscope
operating_room, OR, operating_theater, operating_theatre, surgery
operating_table
ophthalmoscope
optical_device
optical_disk, optical_disc
optical_instrument
optical_pyrometer, pyroscope
optical_telescope
orchestra_pit, pit
ordinary, ordinary_bicycle
organ, pipe_organ
organdy, organdie
organic_light-emitting_diode, OLED
organ_loft
organ_pipe, pipe, pipework
organza
oriel, oriel_window
oriflamme
O_ring
Orlon
orlop_deck, orlop, fourth_deck
orphanage, orphans'_asylum
orphrey
orrery
orthicon, image_orthicon
orthochromatic_film
orthopter, ornithopter
orthoscope
oscillograph
oscilloscope, scope, cathode-ray_oscilloscope, CRO
ossuary
otoscope, auriscope, auroscope
ottoman, pouf, pouffe, puff, hassock
oubliette
out-basket, out-tray
outboard_motor, outboard
outboard_motorboat, outboard
outbuilding
outerwear, overclothes
outfall
outfit, getup, rig, turnout
outfitter
outhouse, privy, earth-closet, jakes
output_device
outrigger
outrigger_canoe
outside_caliper
outside_mirror
outwork
oven
oven_thermometer
overall
overall, boilersuit, boilers_suit
overcoat, overcoating
overdrive
overgarment, outer_garment
overhand_knot
overhang
overhead_projector
overmantel
overnighter, overnight_bag, overnight_case
overpass, flyover
override
overshoe
overskirt
oxbow
Oxbridge
oxcart
oxeye
oxford
oximeter
oxyacetylene_torch
oxygen_mask
oyster_bar
oyster_bed, oyster_bank, oyster_park
pace_car
pacemaker, artificial_pacemaker
pack
pack
pack, face_pack
package, parcel
package_store, liquor_store, off-licence
packaging
packet
packing_box, packing_case
packinghouse, packing_plant
packinghouse
packing_needle
packsaddle
paddle, boat_paddle
paddle
paddle
paddle_box, paddle-box
paddle_steamer, paddle-wheeler
paddlewheel, paddle_wheel
paddock
padlock
page_printer, page-at-a-time_printer
paint, pigment
paintball
paintball_gun
paintbox
paintbrush
paisley
pajama, pyjama, pj's, jammies
pajama, pyjama
palace
palace, castle
palace
palanquin, palankeen
paleolith
palestra, palaestra
palette, pallet
palette_knife
palisade
pallet
pallette, palette
pallium
pallium
pan
pan, cooking_pan
pancake_turner
panchromatic_film
panda_car
paneling, panelling, pane
panhandle
panic_button
pannier
pannier
pannikin
panopticon
panopticon
panpipe, pandean_pipe, syrinx
pantaloon
pantechnicon
pantheon
pantheon
pantie, panty, scanty, step-in
panting, trousering
pant_leg, trouser_leg
pantograph
pantry, larder, buttery
pants_suit, pantsuit
panty_girdle
pantyhose
panzer
paper_chain
paper_clip, paperclip, gem_clip
paper_cutter
paper_fastener
paper_feed
paper_mill
paper_towel
parabolic_mirror
parabolic_reflector, paraboloid_reflector
parachute, chute
parallel_bars, bars
parallel_circuit, shunt_circuit
parallel_interface, parallel_port
parang
parapet, breastwork
parapet
parasail
parasol, sunshade
parer, paring_knife
parfait_glass
pargeting, pargetting, pargetry
pari-mutuel_machine, totalizer, totaliser, totalizator, totalisator
parka, windbreaker, windcheater, anorak
park_bench
parking_meter
parlor, parlour
parquet, parquet_floor
parquetry, parqueterie
parsonage, vicarage, rectory
Parsons_table
partial_denture
particle_detector
partition, divider
parts_bin
party_line
party_wall
parvis
passenger_car, coach, carriage
passenger_ship
passenger_train
passenger_van
passe-partout
passive_matrix_display
passkey, passe-partout, master_key, master
pass-through
pastry_cart
patch
patchcord
patchouli, patchouly, pachouli
patch_pocket
patchwork, patchwork_quilt
patent_log, screw_log, taffrail_log
paternoster
patina
patio, terrace
patisserie
patka
patrol_boat, patrol_ship
patty-pan
pave
pavilion, marquee
pavior, paviour, paving_machine
pavis, pavise
pawn
pawnbroker's_shop, pawnshop, loan_office
pay-phone, pay-station
PC_board
peach_orchard
pea_jacket, peacoat
peavey, peavy, cant_dog, dog_hook
pectoral, pectoral_medallion
pedal, treadle, foot_pedal, foot_lever
pedal_pusher, toreador_pants
pedestal, plinth, footstall
pedestal_table
pedestrian_crossing, zebra_crossing
pedicab, cycle_rickshaw
pediment
pedometer
peeler
peep_sight
peg, nog
peg, pin, thole, tholepin, rowlock, oarlock
peg
peg, wooden_leg, leg, pegleg
pegboard
Pelham
pelican_crossing
pelisse
pelvimeter
pen
penal_colony
penal_institution, penal_facility
penalty_box
pen-and-ink
pencil
pencil
pencil_box, pencil_case
pencil_sharpener
pendant_earring, drop_earring, eardrop
pendulum
pendulum_clock
pendulum_watch
penetration_bomb
penile_implant
penitentiary, pen
penknife
penlight
pennant, pennon, streamer, waft
pennywhistle, tin_whistle, whistle
penthouse
pentode
peplos, peplus, peplum
peplum
pepper_mill, pepper_grinder
pepper_shaker, pepper_box, pepper_pot
pepper_spray
percale
percolator
percussion_cap
percussion_instrument, percussive_instrument
perforation
perfume, essence
perfumery
perfumery
perfumery
peripheral, computer_peripheral, peripheral_device
periscope
peristyle
periwig, peruke
permanent_press, durable_press
perpetual_motion_machine
personal_computer, PC, microcomputer
personal_digital_assistant, PDA, personal_organizer, personal_organiser, organizer, organiser
personnel_carrier
pestle
pestle, muller, pounder
petcock
Petri_dish
petrolatum_gauze
pet_shop
petticoat, half-slip, underskirt
pew, church_bench
phial, vial, ampule, ampul, ampoule
Phillips_screw
Phillips_screwdriver
phonograph_needle, needle
phonograph_record, phonograph_recording, record, disk, disc, platter
photocathode
photocoagulator
photocopier
photographic_equipment
photographic_paper, photographic_material
photometer
photomicrograph
Photostat, Photostat_machine
photostat
physical_pendulum, compound_pendulum
piano, pianoforte, forte-piano
piano_action
piano_keyboard, fingerboard, clavier
piano_wire
piccolo
pick, pickax, pickaxe
pick
pick, plectrum, plectron
pickelhaube
picket_boat
picket_fence, paling
picket_ship
pickle_barrel
pickup, pickup_truck
picture, image, icon, ikon
picture_frame
picture_hat
picture_rail
picture_window
piece_of_cloth, piece_of_material
pied-a-terre
pier
pier
pier_arch
pier_glass, pier_mirror
pier_table
pieta
piezometer
pig_bed, pig
piggery, pig_farm
piggy_bank, penny_bank
pilaster
pile, spile, piling, stilt
pile_driver
pill_bottle
pillbox, toque, turban
pillion
pillory
pillow
pillow_block
pillow_lace, bobbin_lace
pillow_sham
pilot_bit
pilot_boat
pilot_burner, pilot_light, pilot
pilot_cloth
pilot_engine
pilothouse, wheelhouse
pilot_light, pilot_lamp, indicator_lamp
pin
pin, flag
pin, pin_tumbler
pinata
pinball_machine, pin_table
pince-nez
pincer, pair_of_pincers, tweezer, pair_of_tweezers
pinch_bar
pincurl_clip
pinfold
ping-pong_ball
pinhead
pinion
pinnacle
pinprick
pinstripe
pinstripe
pinstripe
pintle
pinwheel, pinwheel_wind_collector
pinwheel
tabor_pipe
pipe
pipe_bomb
pipe_cleaner
pipe_cutter
pipefitting, pipe_fitting
pipet, pipette
pipe_vise, pipe_clamp
pipe_wrench, tube_wrench
pique
pirate, pirate_ship
piste
pistol, handgun, side_arm, shooting_iron
pistol_grip
piston, plunger
piston_ring
piston_rod
pit
pitcher, ewer
pitchfork
pitching_wedge
pitch_pipe
pith_hat, pith_helmet, sun_helmet, topee, topi
piton
Pitot-static_tube, Pitot_head, Pitot_tube
Pitot_tube, Pitot
pitsaw
pivot, pin
pivoting_window
pizzeria, pizza_shop, pizza_parlor
place_of_business, business_establishment
place_of_worship, house_of_prayer, house_of_God, house_of_worship
placket
planchet, coin_blank
plane, carpenter's_plane, woodworking_plane
plane, planer, planing_machine
plane_seat
planetarium
planetarium
planetarium
planetary_gear, epicyclic_gear, planet_wheel, planet_gear
plank-bed
planking
planner
plant, works, industrial_plant
planter
plaster, adhesive_plaster, sticking_plaster
plasterboard, gypsum_board
plastering_trowel
plastic_bag
plastic_bomb
plastic_laminate
plastic_wrap
plastron
plastron
plastron
plate, scale, shell
plate, collection_plate
plate
platen
platen
plate_rack
plate_rail
platform
platform, weapons_platform
platform
platform_bed
platform_rocker
plating, metal_plating
platter
playback
playbox, play-box
playground
playpen, pen
playsuit
plaza, mall, center, shopping_mall, shopping_center, shopping_centre
pleat, plait
plenum
plethysmograph
pleximeter, plessimeter
plexor, plessor, percussor
pliers, pair_of_pliers, plyers
plimsoll
plotter
plow, plough
plug, stopper, stopple
plug, male_plug
plug_fuse
plughole
plumb_bob, plumb, plummet
plumb_level
plunger, plumber's_helper
plus_fours
plush
plywood, plyboard
pneumatic_drill
p-n_junction
p-n-p_transistor
poacher
pocket
pocket_battleship
pocketcomb, pocket_comb
pocket_flap
pocket-handkerchief
pocketknife, pocket_knife
pocket_watch
pod, fuel_pod
pogo_stick
point-and-shoot_camera
pointed_arch
pointing_trowel
point_lace, needlepoint
poker, stove_poker, fire_hook, salamander
polarimeter, polariscope
Polaroid
Polaroid_camera, Polaroid_Land_camera
pole
pole
poleax, poleaxe
poleax, poleaxe
police_boat
police_van, police_wagon, paddy_wagon, patrol_wagon, wagon, black_Maria
polling_booth
polo_ball
polo_mallet, polo_stick
polonaise
polo_shirt, sport_shirt
polyester
polygraph
pomade, pomatum
pommel_horse, side_horse
poncho
pongee
poniard, bodkin
pontifical
pontoon
pontoon_bridge, bateau_bridge, floating_bridge
pony_cart, ponycart, donkey_cart, tub-cart
pool_ball
poolroom
pool_table, billiard_table, snooker_table
poop_deck
poor_box, alms_box, mite_box
poorhouse
pop_bottle, soda_bottle
popgun
poplin
popper
poppet, poppet_valve
pop_tent
porcelain
porch
porkpie, porkpie_hat
porringer
portable
portable_computer
portable_circular_saw, portable_saw
portcullis
porte-cochere
porte-cochere
portfolio
porthole
portico
portiere
portmanteau, Gladstone, Gladstone_bag
portrait_camera
portrait_lens
positive_pole, positive_magnetic_pole, north-seeking_pole
positive_pole
positron_emission_tomography_scanner, PET_scanner
post
postage_meter
post_and_lintel
post_chaise
postern
post_exchange, PX
posthole_digger, post-hole_digger
post_horn
posthouse, post_house
pot
pot, flowerpot
potbelly, potbelly_stove
Potemkin_village
potential_divider, voltage_divider
potentiometer, pot
potentiometer
potpourri
potsherd
potter's_wheel
pottery, clayware
pottle
potty_seat, potty_chair
pouch
poultice, cataplasm, plaster
pound, dog_pound
pound_net
powder
powder_and_shot
powdered_mustard, dry_mustard
powder_horn, powder_flask
powder_keg
power_brake
power_cord
power_drill
power_line, power_cable
power_loom
power_mower, motor_mower
power_pack
power_saw, saw, sawing_machine
power_shovel, excavator, digger, shovel
power_steering, power-assisted_steering
power_takeoff, PTO
power_tool
praetorium, pretorium
prayer_rug, prayer_mat
prayer_shawl, tallith, tallis
precipitator, electrostatic_precipitator, Cottrell_precipitator
prefab
presbytery
presence_chamber
press, mechanical_press
press, printing_press
press
press_box
press_gallery
press_of_sail, press_of_canvas
pressure_cabin
pressure_cooker
pressure_dome
pressure_gauge, pressure_gage
pressurized_water_reactor, PWR
pressure_suit
pricket
prie-dieu
primary_coil, primary_winding, primary
Primus_stove, Primus
Prince_Albert
print
print_buffer
printed_circuit
printer, printing_machine
printer
printer_cable
priory
prison, prison_house
prison_camp, internment_camp, prisoner_of_war_camp, POW_camp
privateer
private_line
privet_hedge
probe
proctoscope
prod, goad
production_line, assembly_line, line
projectile, missile
projector
projector
prolonge
prolonge_knot, sailor's_breastplate
prompter, autocue
prong
propeller, propellor
propeller_plane
propjet, turboprop, turbo-propeller_plane
proportional_counter_tube, proportional_counter
propulsion_system
proscenium, proscenium_wall
proscenium_arch
prosthesis, prosthetic_device
protective_covering, protective_cover, protection
protective_garment
proton_accelerator
protractor
pruner, pruning_hook, lopper
pruning_knife
pruning_saw
pruning_shears
psaltery
psychrometer
PT_boat, mosquito_boat, mosquito_craft, motor_torpedo_boat
public_address_system, P.A._system, PA_system, P.A., PA
public_house, pub, saloon, pothouse, gin_mill, taphouse
public_toilet, comfort_station, public_convenience, convenience, public_lavatory, restroom, toilet_facility, wash_room
public_transport
public_works
puck, hockey_puck
pull
pullback, tieback
pull_chain
pulley, pulley-block, pulley_block, block
pull-off, rest_area, rest_stop, layby, lay-by
Pullman, Pullman_car
pullover, slipover
pull-through
pulse_counter
pulse_generator
pulse_timing_circuit
pump
pump
pump_action, slide_action
pump_house, pumping_station
pump_room
pump-type_pliers
pump_well
punch, puncher
punchboard
punch_bowl
punching_bag, punch_bag, punching_ball, punchball
punch_pliers
punch_press
punnet
punt
pup_tent, shelter_tent
purdah
purifier
purl, purl_stitch
purse
push-bike
push_broom
push_button, push, button
push-button_radio
pusher, zori
put-put
puttee
putter, putting_iron
putty_knife
puzzle
pylon, power_pylon
pylon
pyramidal_tent
pyrograph
pyrometer
pyrometric_cone
pyrostat
pyx, pix
pyx, pix, pyx_chest, pix_chest
pyxis
quad, quadrangle
quadrant
quadraphony, quadraphonic_system, quadriphonic_system
quartering
quarterstaff
quartz_battery, quartz_mill
quartz_lamp
queen
queen
queen_post
quern
quill, quill_pen
quilt, comforter, comfort, puff
quilted_bedspread
quilting
quipu
quirk_molding, quirk_moulding
quirt
quiver
quoin, coign, coigne
quoit
QWERTY_keyboard
rabbet, rebate
rabbet_joint
rabbit_ears
rabbit_hutch
raceabout
racer, race_car, racing_car
raceway, race
racing_boat
racing_gig
racing_skiff, single_shell
rack, stand
rack
rack, wheel
rack_and_pinion
racket, racquet
racquetball
radar, microwave_radar, radio_detection_and_ranging, radiolocation
radial, radial_tire, radial-ply_tire
radial_engine, rotary_engine
radiation_pyrometer
radiator
radiator
radiator_cap
radiator_hose
radio, wireless
radio_antenna, radio_aerial
radio_chassis
radio_compass
radiogram, radiograph, shadowgraph, skiagraph, skiagram
radio_interferometer
radio_link, link
radiometer
radiomicrometer
radio-phonograph, radio-gramophone
radio_receiver, receiving_set, radio_set, radio, tuner, wireless
radiotelegraph, radiotelegraphy, wireless_telegraph, wireless_telegraphy
radiotelephone, radiophone, wireless_telephone
radio_telescope, radio_reflector
radiotherapy_equipment
radio_transmitter
radome, radar_dome
raft
rafter, balk, baulk
raft_foundation
rag, shred, tag, tag_end, tatter
ragbag
raglan
raglan_sleeve
rail
rail_fence
railhead
railing, rail
railing
railroad_bed
railroad_tunnel
rain_barrel
raincoat, waterproof
rain_gauge, rain_gage, pluviometer, udometer
rain_stick
rake
rake_handle
RAM_disk
ramekin, ramequin
ramjet, ramjet_engine, atherodyde, athodyd, flying_drainpipe
rammer
ramp, incline
rampant_arch
rampart, bulwark, wall
ramrod
ramrod
ranch, spread, cattle_ranch, cattle_farm
ranch_house
random-access_memory, random_access_memory, random_memory, RAM, read/write_memory
rangefinder, range_finder
range_hood
range_pole, ranging_pole, flagpole
rapier, tuck
rariora
rasp, wood_file
ratchet, rachet, ratch
ratchet_wheel
rathskeller
ratline, ratlin
rat-tail_file
rattan, ratan
rattrap
rayon
razor
razorblade
reaction-propulsion_engine, reaction_engine
reaction_turbine
reactor
reading_lamp
reading_room
read-only_memory, ROM, read-only_storage, fixed_storage
read-only_memory_chip
readout, read-out
read/write_head, head
ready-to-wear
real_storage
reamer
reamer, juicer, juice_reamer
rearview_mirror
Reaumur_thermometer
rebozo
receiver, receiving_system
receptacle
reception_desk
reception_room
recess, niche
reciprocating_engine
recliner, reclining_chair, lounger
reconnaissance_plane
reconnaissance_vehicle, scout_car
record_changer, auto-changer, changer
recorder, recording_equipment, recording_machine
recording
recording_system
record_player, phonograph
record_sleeve, record_cover
recovery_room
recreational_vehicle, RV, R.V.
recreation_room, rec_room
recycling_bin
recycling_plant
redbrick_university
red_carpet
redoubt
redoubt
reduction_gear
reed_pipe
reed_stop
reef_knot, flat_knot
reel
reel
refectory
refectory_table
refinery
reflecting_telescope, reflector
reflectometer
reflector
reflex_camera
reflux_condenser
reformatory, reform_school, training_school
reformer
refracting_telescope
refractometer
refrigeration_system
refrigerator, icebox
refrigerator_car
refuge, sanctuary, asylum
regalia
regimentals
regulator
rein
relay, electrical_relay
release, button
religious_residence, cloister
reliquary
remote_control, remote
remote_terminal, link-attached_terminal, remote_station, link-attached_station
removable_disk
rendering
rep, repp
repair_shop, fix-it_shop
repeater
repeating_firearm, repeater
repository, monument
reproducer
rerebrace, upper_cannon
rescue_equipment
research_center, research_facility
reseau
reservoir
reset
reset_button
residence
resistance_pyrometer
resistor, resistance
resonator
resonator, cavity_resonator, resonating_chamber
resort_hotel, spa
respirator, inhalator
restaurant, eating_house, eating_place, eatery
rest_house
restraint, constraint
resuscitator
retainer
retaining_wall
reticle, reticule, graticule
reticulation
reticule
retort
retractor
return_key, return
reverberatory_furnace
revers, revere
reverse, reverse_gear
reversible
revetment, revetement, stone_facing
revetment
revolver, six-gun, six-shooter
revolving_door, revolver
rheometer
rheostat, variable_resistor
rhinoscope
rib
riband, ribband
ribbed_vault
ribbing
ribbon_development
rib_joint_pliers
ricer
riddle
ride
ridge, ridgepole, rooftree
ridge_rope
riding_boot
riding_crop, hunting_crop
riding_mower
rifle
rifle_ball
rifle_grenade
rig
rigger, rigger_brush
rigger
rigging, tackle
rigout
ringlet
rings
rink, skating_rink
riot_gun
ripcord
ripcord
ripping_bar
ripping_chisel
ripsaw, splitsaw
riser
riser, riser_pipe, riser_pipeline, riser_main
Ritz
river_boat
rivet
riveting_machine, riveter, rivetter
roach_clip, roach_holder
road, route
roadbed
roadblock, barricade
roadhouse
roadster, runabout, two-seater
roadway
roaster
robe
robotics_equipment
Rochon_prism, Wollaston_prism
rock_bit, roller_bit
rocker
rocker, cradle
rocker_arm, valve_rocker
rocket, rocket_engine
rocket, projectile
rocking_chair, rocker
rod
rodeo
roll
roller
roller
roller_bandage
in-line_skate
Rollerblade
roller_blind
roller_coaster, big_dipper, chute-the-chute
roller_skate
roller_towel
roll_film
rolling_hitch
rolling_mill
rolling_pin
rolling_stock
roll-on
roll-on
roll-on_roll-off
Rolodex
Roman_arch, semicircular_arch
Roman_building
romper, romper_suit
rood_screen
roof
roof
roofing
room
roomette
room_light
roost
rope
rope_bridge
rope_tow
rose_water
rose_window, rosette
rosin_bag
rotary_actuator, positioner
rotary_engine
rotary_press
rotating_mechanism
rotating_shaft, shaft
rotisserie
rotisserie
rotor
rotor, rotor_coil
rotor
rotor_blade, rotary_wing
rotor_head, rotor_shaft
rotunda
rotunda
rouge, paint, blusher
roughcast
rouleau
roulette, toothed_wheel
roulette_ball
roulette_wheel, wheel
round, unit_of_ammunition, one_shot
round_arch
round-bottom_flask
roundel
round_file
roundhouse
router
router
router_plane
rowel
row_house, town_house
rowing_boat
rowlock_arch
royal
royal_mast
rubber_band, elastic_band, elastic
rubber_boot, gum_boot
rubber_bullet
rubber_eraser, rubber, pencil_eraser
rudder
rudder
rudder_blade
rug, carpet, carpeting
rugby_ball
ruin
rule, ruler
rumble
rumble_seat
rummer
rumpus_room, playroom, game_room
runcible_spoon
rundle, spoke, rung
running_shoe
running_suit
runway
rushlight, rush_candle
russet
rya, rya_rug
saber, sabre
saber_saw, jigsaw, reciprocating_saw
sable
sable, sable_brush, sable's_hair_pencil
sable_coat
sabot, wooden_shoe
sachet
sack, poke, paper_bag, carrier_bag
sack, sacque
sackbut
sackcloth
sackcloth
sack_coat
sacking, bagging
saddle
saddlebag
saddle_blanket, saddlecloth, horse_blanket
saddle_oxford, saddle_shoe
saddlery
saddle_seat
saddle_stitch
safe
safe
safe-deposit, safe-deposit_box, safety-deposit, safety_deposit_box, deposit_box, lockbox
safe_house
safety_arch
safety_belt, life_belt, safety_harness
safety_bicycle, safety_bike
safety_bolt, safety_lock
safety_curtain
safety_fuse
safety_lamp, Davy_lamp
safety_match, book_matches
safety_net
safety_pin
safety_rail, guardrail
safety_razor
safety_valve, relief_valve, escape_valve, escape_cock, escape
sail, canvas, canvass, sheet
sail
sailboat, sailing_boat
sailcloth
sailing_vessel, sailing_ship
sailing_warship
sailor_cap
sailor_suit
salad_bar
salad_bowl
salinometer
sallet, salade
salon
salon
salon, beauty_salon, beauty_parlor, beauty_parlour, beauty_shop
saltbox
saltcellar
saltshaker, salt_shaker
saltworks
salver
salwar, shalwar
Sam_Browne_belt
samisen, shamisen
samite
samovar
sampan
sandal
sandbag
sandblaster
sandbox
sandglass
sand_wedge
sandwich_board
sanitary_napkin, sanitary_towel, Kotex
cling_film, clingfilm, Saran_Wrap
sarcenet, sarsenet
sarcophagus
sari, saree
sarong
sash, window_sash
sash_fastener, sash_lock, window_lock
sash_window
satchel
sateen
satellite, artificial_satellite, orbiter
satellite_receiver
satellite_television, satellite_TV
satellite_transmitter
satin
Saturday_night_special
saucepan
saucepot
sauna, sweat_room
savings_bank, coin_bank, money_box, bank
saw
sawed-off_shotgun
sawhorse, horse, sawbuck, buck
sawmill
saw_set
sax, saxophone
saxhorn
scabbard
scaffolding, staging
scale
scale, weighing_machine
scaler
scaling_ladder
scalpel
scanner, electronic_scanner
scanner
scanner, digital_scanner, image_scanner
scantling, stud
scarf
scarf_joint, scarf
scatter_rug, throw_rug
scauper, scorper
Schmidt_telescope, Schmidt_camera
school, schoolhouse
schoolbag
school_bell
school_bus
school_ship, training_ship
school_system
schooner
schooner
scientific_instrument
scimitar
scintillation_counter
scissors, pair_of_scissors
sclerometer
scoinson_arch, sconcheon_arch
sconce
sconce
scoop
scooter
scoreboard
scouring_pad
scow
scow
scraper
scratcher
screen
screen, cover, covert, concealment
screen
screen, CRT_screen
screen_door, screen
screening
screw
screw, screw_propeller
screw
screwdriver
screw_eye
screw_key
screw_thread, thread
screwtop
screw_wrench
scriber, scribe, scratch_awl
scrim
scrimshaw
scriptorium
scrubber
scrub_brush, scrubbing_brush, scrubber
scrub_plane
scuffer
scuffle, scuffle_hoe, Dutch_hoe
scull
scull
scullery
sculpture
scuttle, coal_scuttle
scyphus
scythe
seabag
sea_boat
sea_chest
sealing_wax, seal
sealskin
seam
seaplane, hydroplane
searchlight
searing_iron
seat
seat
seat
seat_belt, seatbelt
secateurs
secondary_coil, secondary_winding, secondary
second_balcony, family_circle, upper_balcony, peanut_gallery
second_base
second_hand
secretary, writing_table, escritoire, secretaire
sectional
security_blanket
security_system, security_measure, security
security_system
sedan, saloon
sedan, sedan_chair
seeder
seeker
seersucker
segmental_arch
Segway, Segway_Human_Transporter, Segway_HT
seidel
seine
seismograph
selector, selector_switch
selenium_cell
self-propelled_vehicle
self-registering_thermometer
self-starter
selsyn, synchro
selvage, selvedge
semaphore
semiautomatic_firearm
semiautomatic_pistol, semiautomatic
semiconductor_device, semiconductor_unit, semiconductor
semi-detached_house
semigloss
semitrailer, semi
sennit
sensitometer
sentry_box
separate
septic_tank
sequence, episode
sequencer, sequenator
serape, sarape
serge
serger
serial_port
serpent
serration
server
server, host
service_club
serving_cart
serving_dish
servo, servomechanism, servosystem
set
set_gun, spring_gun
setscrew
setscrew
set_square
settee
settle, settee
settlement_house
seventy-eight, 78
Seven_Wonders_of_the_Ancient_World, Seven_Wonders_of_the_World
sewage_disposal_plant, disposal_plant
sewer, sewerage, cloaca
sewing_basket
sewing_kit
sewing_machine
sewing_needle
sewing_room
sextant
sgraffito
shackle, bond, hamper, trammel
shackle
shade
shadow_box
shaft
shag_rug
shaker
shank
shank, stem
shantung
shaper, shaping_machine
shaping_tool
sharkskin
sharpener
Sharpie
shaver, electric_shaver, electric_razor
shaving_brush
shaving_cream, shaving_soap
shaving_foam
shawl
shawm
shears
sheath
sheathing, overlay, overlayer
shed
sheep_bell
sheepshank
sheepskin_coat, afghan
sheepwalk, sheeprun
sheet, bed_sheet
sheet_bend, becket_bend, weaver's_knot, weaver's_hitch
sheeting
sheet_pile, sheath_pile, sheet_piling
Sheetrock
shelf
shelf_bracket
shell
shell, case, casing
shell, racing_shell
shellac, shellac_varnish
shelter
shelter
shelter
sheltered_workshop
Sheraton
shield, buckler
shield
shielding
shift_key, shift
shillelagh, shillalah
shim
shingle
shin_guard, shinpad
ship
shipboard_system
shipping, cargo_ships, merchant_marine, merchant_vessels
shipping_room
ship-towed_long-range_acoustic_detection_system
shipwreck
shirt
shirt_button
shirtdress
shirtfront
shirting
shirtsleeve
shirttail
shirtwaist, shirtwaister
shiv
shock_absorber, shock, cushion
shoe
shoe
shoebox
shoehorn
shoe_shop, shoe-shop, shoe_store
shoetree
shofar, shophar
shoji
shooting_brake
shooting_lodge, shooting_box
shooting_stick
shop, store
shop_bell
shopping_bag
shopping_basket
shopping_cart
short_circuit, short
short_iron
short_pants, shorts, trunks
short_sleeve
shortwave_diathermy_machine
shot
shot_glass, jigger, pony
shotgun, scattergun
shotgun_shell
shot_tower
shoulder
shoulder_bag
shouldered_arch
shoulder_holster
shoulder_pad
shoulder_patch
shovel
shovel
shovel_hat
showboat
shower
shower_cap
shower_curtain
shower_room
shower_stall, shower_bath
showroom, salesroom, saleroom
shrapnel
shredder
shrimper
shrine
shrink-wrap
shunt
shunt, electrical_shunt, bypass
shunter
shutter
shutter
shuttle
shuttle
shuttle_bus
shuttlecock, bird, birdie, shuttle
shuttle_helicopter
Sibley_tent
sickbay, sick_berth
sickbed
sickle, reaping_hook, reap_hook
sickroom
sideboard
sidecar
side_chapel
sidelight, running_light
sidesaddle
sidewalk, pavement
sidewall
side-wheeler
sidewinder
sieve, screen
sifter
sights
sigmoidoscope, flexible_sigmoidoscope
signal_box, signal_tower
signaling_device
signboard, sign
silencer, muffler
silent_butler
Silex
silk
silks
silo
silver_plate
silverpoint
simple_pendulum
simulator
single_bed
single-breasted_jacket
single-breasted_suit
single_prop, single-propeller_plane
single-reed_instrument, single-reed_woodwind
single-rotor_helicopter
singlestick, fencing_stick, backsword
singlet, vest, undershirt
siren
sister_ship
sitar
sitz_bath, hip_bath
six-pack, six_pack, sixpack
skate
skateboard
skeg
skein
skeleton, skeletal_frame, frame, underframe
skeleton_key
skep
skep
sketch, study
sketcher
skew_arch
skewer
ski
ski_binding, binding
skibob
ski_boot
ski_cap, stocking_cap, toboggan_cap
skidder
skid_lid
skiff
ski_jump
ski_lodge
ski_mask
skimmer
ski_parka, ski_jacket
ski-plane
ski_pole
ski_rack
skirt
skirt
ski_tow, ski_lift, lift
Skivvies
skullcap
skybox
skyhook
skylight, fanlight
skysail
skyscraper
skywalk
slacks
slack_suit
slasher
slash_pocket
slat, spline
slate
slate_pencil
slate_roof
sled, sledge, sleigh
sleeper
sleeper
sleeping_bag
sleeping_car, sleeper, wagon-lit
sleeve, arm
sleeve
sleigh_bed
sleigh_bell, cascabel
slice_bar
slicer
slicer
slide, playground_slide, sliding_board
slide_fastener, zip, zipper, zip_fastener
slide_projector
slide_rule, slipstick
slide_valve
sliding_door
sliding_seat
sliding_window
sling, scarf_bandage, triangular_bandage
sling
slingback, sling
slinger_ring
slip_clutch, slip_friction_clutch
slipcover
slip-joint_pliers
slipknot
slip-on
slipper, carpet_slipper
slip_ring
slit_lamp
slit_trench
sloop
sloop_of_war
slop_basin, slop_bowl
slop_pail, slop_jar
slops
slopshop, slopseller's_shop
slot, one-armed_bandit
slot_machine, coin_machine
sluice, sluiceway, penstock
smack
small_boat
small_computer_system_interface, SCSI
small_ship
small_stores
smart_bomb
smelling_bottle
smocking
smoke_bomb, smoke_grenade
smokehouse, meat_house
smoker, smoking_car, smoking_carriage, smoking_compartment
smoke_screen, smokescreen
smoking_room
smoothbore
smooth_plane, smoothing_plane
snack_bar, snack_counter, buffet
snaffle, snaffle_bit
snap, snap_fastener, press_stud
snap_brim
snap-brim_hat
snare, gin, noose
snare_drum, snare, side_drum
snatch_block
snifter, brandy_snifter, brandy_glass
sniper_rifle, precision_rifle
snips, tinsnips
Sno-cat
snood
snorkel, schnorkel, schnorchel, snorkel_breather, breather
snorkel
snowbank, snow_bank
snowboard
snowmobile
snowplow, snowplough
snowshoe
snowsuit
snow_thrower, snow_blower
snuffbox
snuffer
snuffers
soapbox
soap_dish
soap_dispenser
soap_pad
soccer_ball
sock
socket
socket_wrench
socle
soda_can
soda_fountain
soda_fountain
sod_house, soddy, adobe_house
sodium-vapor_lamp, sodium-vapour_lamp
sofa, couch, lounge
soffit
softball, playground_ball
soft_pedal
soil_pipe
solar_array, solar_battery, solar_panel
solar_cell, photovoltaic_cell
solar_dish, solar_collector, solar_furnace
solar_heater
solar_house
solar_telescope
solar_thermal_system
soldering_iron
solenoid
solleret, sabaton
sombrero
sonic_depth_finder, fathometer
sonogram, echogram
sonograph
sorter
souk
sound_bow
soundbox, body
sound_camera
sounder
sound_film
sounding_board, soundboard
sounding_rocket
sound_recording, audio_recording, audio
sound_spectrograph
soup_bowl
soup_ladle
soupspoon, soup_spoon
source_of_illumination
sourdine
soutache
soutane
sou'wester
soybean_future
space_bar
space_capsule, capsule
spacecraft, ballistic_capsule, space_vehicle
space_heater
space_helmet
space_rocket
space_shuttle
space_station, space_platform, space_laboratory
spacesuit
spade
spade_bit
spaghetti_junction
Spandau
spandex
spandrel, spandril
spanker
spar
sparge_pipe
spark_arrester, sparker
spark_arrester
spark_chamber, spark_counter
spark_coil
spark_gap
spark_lever
spark_plug, sparking_plug, plug
sparkplug_wrench
spark_transmitter
spat, gaiter
spatula
spatula
speakerphone
speaking_trumpet
spear, lance, shaft
spear, gig, fizgig, fishgig, lance
specialty_store
specimen_bottle
spectacle
spectacles, specs, eyeglasses, glasses
spectator_pump, spectator
spectrograph
spectrophotometer
spectroscope, prism_spectroscope
speculum
speedboat
speed_bump
speedometer, speed_indicator
speed_skate, racing_skate
spherometer
sphygmomanometer
spicemill
spice_rack
spider
spider_web, spider's_web
spike
spike
spindle
spindle, mandrel, mandril, arbor
spindle
spin_dryer, spin_drier
spinet
spinet
spinnaker
spinner
spinning_frame
spinning_jenny
spinning_machine
spinning_rod
spinning_wheel
spiral_bandage
spiral_ratchet_screwdriver, ratchet_screwdriver
spiral_spring
spirit_lamp
spirit_stove
spirometer
spit
spittoon, cuspidor
splashboard, splasher, dashboard
splasher
splice, splicing
splicer
splint
split_rail, fence_rail
Spode
spoiler
spoiler
spoke, wheel_spoke, radius
spokeshave
sponge_cloth
sponge_mop
spoon
spoon
Spork
sporran
sport_kite, stunt_kite
sports_car, sport_car
sports_equipment
sports_implement
sportswear, athletic_wear, activewear
sport_utility, sport_utility_vehicle, S.U.V., SUV
spot
spotlight, spot
spot_weld, spot-weld
spouter
sprag
spray_gun
spray_paint
spreader
sprig
spring
spring_balance, spring_scale
springboard
sprinkler
sprinkler_system
sprit
spritsail
sprocket, sprocket_wheel
sprocket
spun_yarn
spur, gad
spur_gear, spur_wheel
sputnik
spy_satellite
squad_room
square
square_knot
square-rigger
square_sail
squash_ball
squash_racket, squash_racquet, bat
squawk_box, squawker, intercom_speaker
squeegee
squeezer
squelch_circuit, squelch, squelcher
squinch
stabilizer, stabiliser
stabilizer
stabilizer_bar, anti-sway_bar
stable, stalls, horse_barn
stable_gear, saddlery, tack
stabling
stacks
staddle
stadium, bowl, arena, sports_stadium
stage
stagecoach, stage
stained-glass_window
stair-carpet
stair-rod
stairwell
stake
stall, stand, sales_booth
stall
stamp
stamp_mill, stamping_mill
stamping_machine, stamper
stanchion
stand
standard
standard_cell
standard_transmission, stick_shift
standing_press
stanhope
Stanley_Steamer
staple
staple
staple_gun, staplegun, tacker
stapler, stapling_machine
starship, spaceship
starter, starter_motor, starting_motor
starting_gate, starting_stall
Stassano_furnace, electric-arc_furnace
Statehouse
stately_home
state_prison
stateroom
static_tube
station
stator, stator_coil
statue
stay
staysail
steakhouse, chophouse
steak_knife
stealth_aircraft
stealth_bomber
stealth_fighter
steam_bath, steam_room, vapor_bath, vapour_bath
steamboat
steam_chest
steam_engine
steamer, steamship
steamer
steam_iron
steam_locomotive
steamroller, road_roller
steam_shovel
steam_turbine
steam_whistle
steel
steel_arch_bridge
steel_drum
steel_mill, steelworks, steel_plant, steel_factory
steel-wool_pad
steelyard, lever_scale, beam_scale
steeple, spire
steerage
steering_gear
steering_linkage
steering_system, steering_mechanism
steering_wheel, wheel
stele, stela
stem-winder
stencil
Sten_gun
stenograph
step, stair
step-down_transformer
step_stool
step-up_transformer
stereo, stereophony, stereo_system, stereophonic_system
stereoscope
stern_chaser
sternpost
sternwheeler
stethoscope
stewing_pan, stewpan
stick
stick
stick, control_stick, joystick
stick
stile
stiletto
still
stillroom, still_room
Stillson_wrench
stilt
Stinger
stink_bomb, stench_bomb
stirrer
stirrup, stirrup_iron
stirrup_pump
stob
stock, gunstock
stockade
stockcar
stock_car
stockinet, stockinette
stocking
stock-in-trade
stockpot
stockroom, stock_room
stocks
stock_saddle, Western_saddle
stockyard
stole
stomacher
stomach_pump
stone_wall
stoneware
stonework
stool
stoop, stoep
stop_bath, short-stop, short-stop_bath
stopcock, cock, turncock
stopper_knot
stopwatch, stop_watch
storage_battery, accumulator
storage_cell, secondary_cell
storage_ring
storage_space
storeroom, storage_room, stowage
storm_cellar, cyclone_cellar, tornado_cellar
storm_door
storm_window, storm_sash
stoup, stoop
stoup
stove
stove, kitchen_stove, range, kitchen_range, cooking_stove
stove_bolt
stovepipe
stovepipe_iron
Stradavarius, Strad
straight_chair, side_chair
straightedge
straightener
straight_flute, straight-fluted_drill
straight_pin
straight_razor
strainer
straitjacket, straightjacket
strap
strap
strap_hinge, joint_hinge
strapless
streamer_fly
streamliner
street
street
streetcar, tram, tramcar, trolley, trolley_car
street_clothes
streetlight, street_lamp
stretcher
stretcher
stretch_pants
strickle
strickle
stringed_instrument
stringer
stringer
string_tie
strip
strip_lighting
strip_mall
stroboscope, strobe, strobe_light
strongbox, deedbox
stronghold, fastness
strongroom
strop
structural_member
structure, construction
student_center
student_lamp
student_union
stud_finder
studio_apartment, studio
studio_couch, day_bed
study
study_hall
stuffing_nut, packing_nut
stump
stun_gun, stun_baton
stupa, tope
sty, pigsty, pigpen
stylus, style
stylus
sub-assembly
subcompact, subcompact_car
submachine_gun
submarine, pigboat, sub, U-boat
submarine_torpedo
submersible, submersible_warship
submersible
subtracter
subway_token
subway_train
subwoofer
suction_cup
suction_pump
sudatorium, sudatory
suede_cloth, suede
sugar_bowl
sugar_refinery
sugar_spoon, sugar_shell
suit, suit_of_clothes
suite, rooms
suiting
sulky
summer_house
sumo_ring
sump
sump_pump
sunbonnet
Sunday_best, Sunday_clothes
sun_deck
sundial
sundress
sundries
sun_gear
sunglass
sunglasses, dark_glasses, shades
sunhat, sun_hat
sunlamp, sun_lamp, sunray_lamp, sun-ray_lamp
sun_parlor, sun_parlour, sun_porch, sunporch, sunroom, sun_lounge, solarium
sunroof, sunshine-roof
sunscreen, sunblock, sun_blocker
sunsuit
supercharger
supercomputer
superconducting_supercollider
superhighway, information_superhighway
supermarket
superstructure
supertanker
supper_club
supplejack
supply_chamber
supply_closet
support
support
support_column
support_hose, support_stocking
supporting_structure
supporting_tower
surcoat
surface_gauge, surface_gage, scribing_block
surface_lift
surface_search_radar
surface_ship
surface-to-air_missile, SAM
surface-to-air_missile_system
surfboat
surcoat
surgeon's_knot
surgery
surge_suppressor, surge_protector, spike_suppressor, spike_arrester, lightning_arrester
surgical_dressing
surgical_instrument
surgical_knife
surplice
surrey
surtout
surveillance_system
surveying_instrument, surveyor's_instrument
surveyor's_level
sushi_bar
suspension, suspension_system
suspension_bridge
suspensory, suspensory_bandage
sustaining_pedal, loud_pedal
suture, surgical_seam
swab, swob, mop
swab
swaddling_clothes, swaddling_bands
swag
swage_block
swagger_stick
swallow-tailed_coat, swallowtail, morning_coat
swamp_buggy, marsh_buggy
swan's_down
swathe, wrapping
swatter, flyswatter, flyswat
sweat_bag
sweatband
sweater, jumper
sweat_pants, sweatpants
sweatshirt
sweatshop
sweat_suit, sweatsuit, sweats, workout_suit
sweep, sweep_oar
sweep_hand, sweep-second
swimming_trunks, bathing_trunks
swimsuit, swimwear, bathing_suit, swimming_costume, bathing_costume
swing
swing_door, swinging_door
switch, electric_switch, electrical_switch
switchblade, switchblade_knife, flick-knife, flick_knife
switch_engine, donkey_engine
swivel
swivel_chair
swizzle_stick
sword, blade, brand, steel
sword_cane, sword_stick
S_wrench
synagogue, temple, tabernacle
synchrocyclotron
synchroflash
synchromesh
synchronous_converter, rotary, rotary_converter
synchronous_motor
synchrotron
synchroscope, synchronoscope, synchronizer, synchroniser
synthesizer, synthesiser
syringe
system
tabard
Tabernacle
tabi, tabis
tab_key, tab
table
table
tablefork
table_knife
table_lamp
table_saw
tablespoon
tablet-armed_chair
table-tennis_table, ping-pong_table, pingpong_table
table-tennis_racquet, table-tennis_bat, pingpong_paddle
tabletop
tableware
tabor, tabour
taboret, tabouret
tachistoscope, t-scope
tachograph
tachometer, tach
tachymeter, tacheometer
tack
tack_hammer
taffeta
taffrail
tailgate, tailboard
taillight, tail_lamp, rear_light, rear_lamp
tailor-made
tailor's_chalk
tailpipe
tail_rotor, anti-torque_rotor
tailstock
take-up
talaria
talcum, talcum_powder
tam, tam-o'-shanter, tammy
tambour
tambour, embroidery_frame, embroidery_hoop
tambourine
tammy
tamp, tamper, tamping_bar
Tampax
tampion, tompion
tampon
tandoor
tangram
tank, storage_tank
tank, army_tank, armored_combat_vehicle, armoured_combat_vehicle
tankard
tank_car, tank
tank_destroyer
tank_engine, tank_locomotive
tanker_plane
tank_shell
tank_top
tannoy
tap, spigot
tapa, tappa
tape, tape_recording, taping
tape, tapeline, tape_measure
tape_deck
tape_drive, tape_transport, transport
tape_player
tape_recorder, tape_machine
taper_file
tapestry, tapis
tappet
tap_wrench
tare
target, butt
target_acquisition_system
tarmacadam, tarmac, macadam
tarpaulin, tarp
tartan, plaid
tasset, tasse
tattoo
tavern, tap_house
tawse
taximeter
T-bar_lift, T-bar, Alpine_lift
tea_bag
tea_ball
tea_cart, teacart, tea_trolley, tea_wagon
tea_chest
teaching_aid
teacup
tea_gown
teakettle
tea_maker
teapot
teashop, teahouse, tearoom, tea_parlor, tea_parlour
teaspoon
tea-strainer
tea_table
tea_tray
tea_urn
tee, golf_tee
tee_hinge, T_hinge
telecom_hotel, telco_building
telecommunication_system, telecom_system, telecommunication_equipment, telecom_equipment
telegraph, telegraphy
telegraph_key
telemeter
telephone, phone, telephone_set
telephone_bell
telephone_booth, phone_booth, call_box, telephone_box, telephone_kiosk
telephone_cord, phone_cord
telephone_jack, phone_jack
telephone_line, phone_line, telephone_circuit, subscriber_line, line
telephone_plug, phone_plug
telephone_pole, telegraph_pole, telegraph_post
telephone_receiver, receiver
telephone_system, phone_system
telephone_wire, telephone_line, telegraph_wire, telegraph_line
telephoto_lens, zoom_lens
Teleprompter
telescope, scope
telescopic_sight, telescope_sight
telethermometer
teletypewriter, teleprinter, teletype_machine, telex, telex_machine
television, television_system
television_antenna, tv-antenna
television_camera, tv_camera, camera
television_equipment, video_equipment
television_monitor, tv_monitor
television_receiver, television, television_set, tv, tv_set, idiot_box, boob_tube, telly, goggle_box
television_room, tv_room
television_transmitter
telpher, telfer
telpherage, telferage
tempera, poster_paint, poster_color, poster_colour
temple
temple
temporary_hookup, patch
tender, supply_ship
tender, ship's_boat, pinnace, cutter
tender
tenement, tenement_house
tennis_ball
tennis_camp
tennis_racket, tennis_racquet
tenon
tenor_drum, tom-tom
tenoroon
tenpenny_nail
tenpin
tensimeter
tensiometer
tensiometer
tensiometer
tent, collapsible_shelter
tenter
tenterhook
tent-fly, rainfly, fly_sheet, fly, tent_flap
tent_peg
tepee, tipi, teepee
terminal, pole
terminal
terraced_house
terra_cotta
terrarium
terra_sigillata, Samian_ware
terry, terry_cloth, terrycloth
Tesla_coil
tessera
test_equipment
test_rocket, research_rocket, test_instrument_vehicle
test_room, testing_room
testudo
tetraskelion, tetraskele
tetrode
textile_machine
textile_mill
thatch, thatched_roof
theater, theatre, house
theater_curtain, theatre_curtain
theater_light
theodolite, transit
theremin
thermal_printer
thermal_reactor
thermocouple, thermocouple_junction
thermoelectric_thermometer, thermel, electric_thermometer
thermograph, thermometrograph
thermograph
thermohydrometer, thermogravimeter
thermojunction
thermometer
thermonuclear_reactor, fusion_reactor
thermopile
thermos, thermos_bottle, thermos_flask
thermostat, thermoregulator
thigh_pad
thill
thimble
thinning_shears
third_base, third
third_gear, third
third_rail
thong
thong
three-centered_arch, basket-handle_arch
three-decker
three-dimensional_radar, 3d_radar
three-piece_suit
three-quarter_binding
three-way_switch, three-point_switch
thresher, thrasher, threshing_machine
threshing_floor
thriftshop, second-hand_store
throat_protector
throne
thrust_bearing
thruster
thumb
thumbhole
thumbscrew
thumbstall
thumbtack, drawing_pin, pushpin
thunderer
thwart, cross_thwart
tiara
ticking
tickler_coil
tie, tie_beam
tie, railroad_tie, crosstie, sleeper
tie_rack
tie_rod
tights, leotards
tile
tile_cutter
tile_roof
tiller
tilter
tilt-top_table, tip-top_table, tip_table
timber
timber
timber_hitch
timbrel
time_bomb, infernal_machine
time_capsule
time_clock
time-delay_measuring_instrument, time-delay_measuring_system
time-fuse
timepiece, timekeeper, horologe
timer
timer
time-switch
tin
tinderbox
tine
tinfoil, tin_foil
tippet
tire_chain, snow_chain
tire_iron, tire_tool
titfer
tithe_barn
titrator
toaster
toaster_oven
toasting_fork
toastrack
tobacco_pouch
tobacco_shop, tobacconist_shop, tobacconist
toboggan
toby, toby_jug, toby_fillpot_jug
tocsin, warning_bell
toe
toecap
toehold
toga
toga_virilis
toggle
toggle_bolt
toggle_joint
toggle_switch, toggle, on-off_switch, on/off_switch
togs, threads, duds
toilet, lavatory, lav, can, john, privy, bathroom
toilet_bag, sponge_bag
toilet_bowl
toilet_kit, travel_kit
toilet_powder, bath_powder, dusting_powder
toiletry, toilet_articles
toilet_seat
toilet_water, eau_de_toilette
tokamak
token
tollbooth, tolbooth, tollhouse
toll_bridge
tollgate, tollbar
toll_line
tomahawk, hatchet
Tommy_gun, Thompson_submachine_gun
tomograph
tone_arm, pickup, pickup_arm
toner
tongs, pair_of_tongs
tongue
tongue_and_groove_joint
tongue_depressor
tonometer
tool
tool_bag
toolbox, tool_chest, tool_cabinet, tool_case
toolshed, toolhouse
tooth
tooth
toothbrush
toothpick
top
top, cover
topgallant, topgallant_mast
topgallant, topgallant_sail
topiary
topknot
topmast
topper
topsail
toque
torch
torpedo
torpedo
torpedo
torpedo_boat
torpedo-boat_destroyer
torpedo_tube
torque_converter
torque_wrench
torture_chamber
totem_pole
touch_screen, touchscreen
toupee, toupe
touring_car, phaeton, tourer
tourist_class, third_class
towel
toweling, towelling
towel_rack, towel_horse
towel_rail, towel_bar
tower
town_hall
towpath, towing_path
tow_truck, tow_car, wrecker
toy
toy_box, toy_chest
toyshop
trace_detector
track, rail, rails, runway
track
trackball
tracked_vehicle
tract_house
tract_housing
traction_engine
tractor
tractor
trail_bike, dirt_bike, scrambler
trailer, house_trailer
trailer
trailer_camp, trailer_park
trailer_truck, tractor_trailer, trucking_rig, rig, articulated_lorry, semi
trailing_edge
train, railroad_train
tramline, tramway, streetcar_track
trammel
trampoline
tramp_steamer, tramp
tramway, tram, aerial_tramway, cable_tramway, ropeway
transdermal_patch, skin_patch
transept
transformer
transistor, junction_transistor, electronic_transistor
transit_instrument
transmission, transmission_system
transmission_shaft
transmitter, sender
transom, traverse
transom, transom_window, fanlight
transponder
transporter
transporter, car_transporter
transport_ship
trap
trap_door
trapeze
trave, traverse, crossbeam, crosspiece
travel_iron
trawl, dragnet, trawl_net
trawl, trawl_line, spiller, setline, trotline
trawler, dragger
tray
tray_cloth
tread
tread
treadmill, treadwheel, tread-wheel
treadmill
treasure_chest
treasure_ship
treenail, trenail, trunnel
trefoil_arch
trellis, treillage
trench
trench_coat
trench_knife
trepan
trepan, trephine
trestle
trestle
trestle_bridge
trestle_table
trestlework
trews
trial_balloon
triangle
triangle
triclinium
triclinium
tricorn, tricorne
tricot
tricycle, trike, velocipede
trident
trigger
trimaran
trimmer
trimmer_arch
triode
tripod
triptych
trip_wire
trireme
triskelion, triskele
triumphal_arch
trivet
trivet
troika
troll
trolleybus, trolley_coach, trackless_trolley
trombone
troop_carrier, troop_transport
troopship
trophy_case
trough
trouser
trouser_cuff
trouser_press, pants_presser
trouser, pant
trousseau
trowel
truck, motortruck
trumpet_arch
truncheon, nightstick, baton, billy, billystick, billy_club
trundle_bed, trundle, truckle_bed, truckle
trunk
trunk_hose
trunk_lid
trunk_line
truss
truss_bridge
try_square
T-square
tub, vat
tube, vacuum_tube, thermionic_vacuum_tube, thermionic_tube, electron_tube, thermionic_valve
tuck_box
tucker
tucker-bag
tuck_shop
Tudor_arch, four-centered_arch
tudung
tugboat, tug, towboat, tower
tulle
tumble-dryer, tumble_drier
tumbler
tumbrel, tumbril
tun
tunic
tuning_fork
tupik, tupek, sealskin_tent
turban
turbine
turbogenerator
tureen
Turkish_bath
Turkish_towel, terry_towel
Turk's_head
turnbuckle
turner, food_turner
turnery
turnpike
turnspit
turnstile
turntable
turntable, lazy_Susan
turret
turret_clock
turtleneck, turtle, polo-neck
tweed
tweeter
twenty-two, .22
twenty-two_pistol
twenty-two_rifle
twill
twill, twill_weave
twin_bed
twinjet
twist_bit, twist_drill
two-by-four
two-man_tent
two-piece, two-piece_suit, lounge_suit
typesetting_machine
typewriter
typewriter_carriage
typewriter_keyboard
tyrolean, tirolean
uke, ukulele
ulster
ultracentrifuge
ultramicroscope, dark-field_microscope
Ultrasuede
ultraviolet_lamp, ultraviolet_source
umbrella
umbrella_tent
undercarriage
undercoat, underseal
undergarment, unmentionable
underpants
underwear, underclothes, underclothing
undies
uneven_parallel_bars, uneven_bars
unicycle, monocycle
uniform
universal_joint, universal
university
upholstery
upholstery_material
upholstery_needle
uplift
upper_berth, upper
upright, upright_piano
upset, swage
upstairs
urceole
urn
urn
used-car, secondhand_car
utensil
Uzi
vacation_home
vacuum, vacuum_cleaner
vacuum_chamber
vacuum_flask, vacuum_bottle
vacuum_gauge, vacuum_gage
Valenciennes, Valenciennes_lace
valise
valve
valve
valve-in-head_engine
vambrace, lower_cannon
van
van, caravan
vane
vaporizer, vaporiser
variable-pitch_propeller
variometer
varnish
vase
vault
vault, bank_vault
vaulting_horse, long_horse, buck
vehicle
Velcro
velocipede
velour, velours
velvet
velveteen
vending_machine
veneer, veneering
Venetian_blind
Venn_diagram, Venn's_diagram
ventilation, ventilation_system, ventilating_system
ventilation_shaft
ventilator
veranda, verandah, gallery
verdigris
vernier_caliper, vernier_micrometer
vernier_scale, vernier
vertical_file
vertical_stabilizer, vertical_stabiliser, vertical_fin, tail_fin, tailfin
vertical_tail
Very_pistol, Verey_pistol
vessel, watercraft
vessel
vest, waistcoat
vestiture
vestment
vest_pocket
vestry, sacristy
viaduct
vibraphone, vibraharp, vibes
vibrator
vibrator
Victrola
vicuna
videocassette
videocassette_recorder, VCR
videodisk, videodisc, DVD
video_recording, video
videotape
videotape
vigil_light, vigil_candle
villa
villa
villa
viol
viola
viola_da_braccio
viola_da_gamba, gamba, bass_viol
viola_d'amore
violin, fiddle
virginal, pair_of_virginals
viscometer, viscosimeter
viscose_rayon, viscose
vise, bench_vise
visor, vizor
visual_display_unit, VDU
vivarium
Viyella
voile
volleyball
volleyball_net
voltage_regulator
voltaic_cell, galvanic_cell, primary_cell
voltaic_pile, pile, galvanic_pile
voltmeter
vomitory
von_Neumann_machine
voting_booth
voting_machine
voussoir
vox_angelica, voix_celeste
vox_humana
waders
wading_pool
waffle_iron
wagon, waggon
wagon, coaster_wagon
wagon_tire
wagon_wheel
wain
wainscot, wainscoting, wainscotting
wainscoting, wainscotting
waist_pack, belt_bag
walker, baby-walker, go-cart
walker, Zimmer, Zimmer_frame
walker
walkie-talkie, walky-talky
walk-in
walking_shoe
walking_stick
Walkman
walk-up_apartment, walk-up
wall
wall
wall_clock
wallet, billfold, notecase, pocketbook
wall_tent
wall_unit
wand
Wankel_engine, Wankel_rotary_engine, epitrochoidal_engine
ward, hospital_ward
wardrobe, closet, press
wardroom
warehouse, storage_warehouse
warming_pan
war_paint
warplane, military_plane
war_room
warship, war_vessel, combat_ship
wash
wash-and-wear
washbasin, handbasin, washbowl, lavabo, wash-hand_basin
washboard, splashboard
washboard
washer, automatic_washer, washing_machine
washer
washhouse
washroom
washstand, wash-hand_stand
washtub
wastepaper_basket, waste-paper_basket, wastebasket, waste_basket, circular_file
watch, ticker
watch_cap
watch_case
watch_glass
watchtower
water-base_paint
water_bed
water_bottle
water_butt
water_cart
water_chute
water_closet, closet, W.C., loo
watercolor, water-color, watercolour, water-colour
water-cooled_reactor
water_cooler
water_faucet, water_tap, tap, hydrant
water_filter
water_gauge, water_gage, water_glass
water_glass
water_hazard
water_heater, hot-water_heater, hot-water_tank
watering_can, watering_pot
watering_cart
water_jacket
water_jug
water_jump
water_level
water_meter
water_mill
waterproof
waterproofing
water_pump
water_scooter, sea_scooter, scooter
water_ski
waterspout
water_tower
water_wagon, water_waggon
waterwheel, water_wheel
waterwheel, water_wheel
water_wings
waterworks
wattmeter
waxwork, wax_figure
ways, shipway, slipway
weapon, arm, weapon_system
weaponry, arms, implements_of_war, weapons_system, munition
weapons_carrier
weathercock
weatherglass
weather_satellite, meteorological_satellite
weather_ship
weathervane, weather_vane, vane, wind_vane
web, entanglement
web
webbing
webcam
wedge
wedge
wedgie
Wedgwood
weeder, weed-whacker
weeds, widow's_weeds
weekender
weighbridge
weight, free_weight, exercising_weight
weir
weir
welcome_wagon
weld
welder's_mask
weldment
well
wellhead
welt
Weston_cell, cadmium_cell
wet_bar
wet-bulb_thermometer
wet_cell
wet_fly
wet_suit
whaleboat
whaler, whaling_ship
whaling_gun
wheel
wheel
wheel_and_axle
wheelchair
wheeled_vehicle
wheelwork
wherry
wherry, Norfolk_wherry
whetstone
whiffletree, whippletree, swingletree
whip
whipcord
whipping_post
whipstitch, whipping, whipstitching
whirler
whisk, whisk_broom
whisk
whiskey_bottle
whiskey_jug
whispering_gallery, whispering_dome
whistle
whistle
white
white_goods
whitewash
whorehouse, brothel, bordello, bagnio, house_of_prostitution, house_of_ill_repute, bawdyhouse, cathouse, sporting_house
wick, taper
wicker, wickerwork, caning
wicker_basket
wicket, hoop
wicket
wickiup, wikiup
wide-angle_lens, fisheye_lens
widebody_aircraft, wide-body_aircraft, wide-body, twin-aisle_airplane
wide_wale
widow's_walk
Wiffle, Wiffle_Ball
wig
wigwam
Wilton, Wilton_carpet
wimple
wincey
winceyette
winch, windlass
Winchester
windbreak, shelterbelt
winder, key
wind_instrument, wind
windjammer
windmill, aerogenerator, wind_generator
windmill
window
window
window_blind
window_box
window_envelope
window_frame
window_screen
window_seat
window_shade
windowsill
windshield, windscreen
windshield_wiper, windscreen_wiper, wiper, wiper_blade
Windsor_chair
Windsor_knot
Windsor_tie
wind_tee
wind_tunnel
wind_turbine
wine_bar
wine_bottle
wine_bucket, wine_cooler
wine_cask, wine_barrel
wineglass
winepress
winery, wine_maker
wineskin
wing
wing_chair
wing_nut, wing-nut, wing_screw, butterfly_nut, thumbnut
wing_tip
wing_tip
winker, blinker, blinder
wiper, wiper_arm, contact_arm
wiper_motor
wire
wire, conducting_wire
wire_cloth
wire_cutter
wire_gauge, wire_gage
wireless_local_area_network, WLAN, wireless_fidelity, WiFi
wire_matrix_printer, wire_printer, stylus_printer
wire_recorder
wire_stripper
wirework, grillwork
wiring
wishing_cap
witness_box, witness_stand
wok
woman's_clothing
wood
woodcarving
wood_chisel
woodenware
wooden_spoon
woodscrew
woodshed
wood_vise, woodworking_vise, shoulder_vise
woodwind, woodwind_instrument, wood
woof, weft, filling, pick
woofer
wool, woolen, woollen
workbasket, workbox, workbag
workbench, work_bench, bench
work-clothing, work-clothes
workhouse
workhouse
workpiece
workroom
works, workings
work-shirt
workstation
worktable, work_table
workwear
World_Wide_Web, WWW, web
worm_fence, snake_fence, snake-rail_fence, Virginia_fence
worm_gear
worm_wheel
worsted
worsted, worsted_yarn
wrap, wrapper
wraparound
wrapping, wrap, wrapper
wreck
wrench, spanner
wrestling_mat
wringer
wrist_pad
wrist_pin, gudgeon_pin
wristwatch, wrist_watch
writing_arm
writing_desk
writing_desk
writing_implement
xerographic_printer
Xerox, xerographic_copier, Xerox_machine
X-ray_film
X-ray_machine
X-ray_tube
yacht, racing_yacht
yacht_chair
yagi, Yagi_aerial
yard
yard
yardarm
yard_marker
yardstick, yard_measure
yarmulke, yarmulka, yarmelke
yashmak, yashmac
yataghan
yawl, dandy
yawl
yoke
yoke
yoke, coupling
yurt
Zamboni
zero
ziggurat, zikkurat, zikurat
zill
zip_gun
zither, cither, zithern
zoot_suit
shading
grain
wood_grain, woodgrain, woodiness
graining, woodgraining
marbleization, marbleisation, marbleizing, marbleising
light, lightness
aura, aureole, halo, nimbus, glory, gloriole
sunniness
glint
opalescence, iridescence
polish, gloss, glossiness, burnish
primary_color_for_pigments, primary_colour_for_pigments
primary_color_for_light, primary_colour_for_light
colorlessness, colourlessness, achromatism, achromaticity
mottle
achromia
shade, tint, tincture, tone
chromatic_color, chromatic_colour, spectral_color, spectral_colour
black, blackness, inkiness
coal_black, ebony, jet_black, pitch_black, sable, soot_black
alabaster
bone, ivory, pearl, off-white
gray, grayness, grey, greyness
ash_grey, ash_gray, silver, silver_grey, silver_gray
charcoal, charcoal_grey, charcoal_gray, oxford_grey, oxford_gray
sanguine
Turkey_red, alizarine_red
crimson, ruby, deep_red
dark_red
claret
fuschia
maroon
orange, orangeness
reddish_orange
yellow, yellowness
gamboge, lemon, lemon_yellow, maize
pale_yellow, straw, wheat
green, greenness, viridity
greenishness
sea_green
sage_green
bottle_green
emerald
olive_green, olive-green
jade_green, jade
blue, blueness
azure, cerulean, sapphire, lazuline, sky-blue
steel_blue
greenish_blue, aqua, aquamarine, turquoise, cobalt_blue, peacock_blue
purplish_blue, royal_blue
purple, purpleness
Tyrian_purple
indigo
lavender
reddish_purple, royal_purple
pink
carnation
rose, rosiness
chestnut
chocolate, coffee, deep_brown, umber, burnt_umber
light_brown
tan, topaz
beige, ecru
reddish_brown, sepia, burnt_sienna, Venetian_red, mahogany
brick_red
copper, copper_color
Indian_red
puce
olive
ultramarine
complementary_color, complementary
pigmentation
complexion, skin_color, skin_colour
ruddiness, rosiness
nonsolid_color, nonsolid_colour, dithered_color, dithered_colour
aposematic_coloration, warning_coloration
cryptic_coloration
ring
center_of_curvature, centre_of_curvature
cadaver, corpse, stiff, clay, remains
mandibular_notch
rib
skin, tegument, cutis
skin_graft
epidermal_cell
melanocyte
prickle_cell
columnar_cell, columnar_epithelial_cell
spongioblast
squamous_cell
amyloid_plaque, amyloid_protein_plaque
dental_plaque, bacterial_plaque
macule, macula
freckle, lentigo
bouffant
sausage_curl
forelock
spit_curl, kiss_curl
pigtail
pageboy
pompadour
thatch
soup-strainer, toothbrush
mustachio, moustachio, handle-bars
walrus_mustache, walrus_moustache
stubble
vandyke_beard, vandyke
soul_patch, Attilio
esophageal_smear
paraduodenal_smear, duodenal_smear
specimen
punctum
glenoid_fossa, glenoid_cavity
diastema
marrow, bone_marrow
mouth, oral_cavity, oral_fissure, rima_oris
canthus
milk
mother's_milk
colostrum, foremilk
vein, vena, venous_blood_vessel
ganglion_cell, gangliocyte
X_chromosome
embryonic_cell, formative_cell
myeloblast
sideroblast
osteocyte
megalocyte, macrocyte
leukocyte, leucocyte, white_blood_cell, white_cell, white_blood_corpuscle, white_corpuscle, WBC
histiocyte
fixed_phagocyte
lymphocyte, lymph_cell
monoblast
neutrophil, neutrophile
microphage
sickle_cell
siderocyte
spherocyte
ootid
oocyte
spermatid
Leydig_cell, Leydig's_cell
striated_muscle_cell, striated_muscle_fiber
smooth_muscle_cell
Ranvier's_nodes, nodes_of_Ranvier
neuroglia, glia
astrocyte
protoplasmic_astrocyte
oligodendrocyte
proprioceptor
dendrite
sensory_fiber, afferent_fiber
subarachnoid_space
cerebral_cortex, cerebral_mantle, pallium, cortex
renal_cortex
prepuce, foreskin
head, caput
scalp
frontal_eminence
suture, sutura, fibrous_joint
foramen_magnum
esophagogastric_junction, oesophagogastric_junction
heel
cuticle
hangnail, agnail
exoskeleton
abdominal_wall
lemon
coordinate_axis
landscape
medium
vehicle
paper
channel, transmission_channel
film, cinema, celluloid
silver_screen
free_press
press, public_press
print_media
storage_medium, data-storage_medium
magnetic_storage_medium, magnetic_medium, magnetic_storage
journalism, news_media
Fleet_Street
photojournalism
news_photography
rotogravure
newspaper, paper
daily
gazette
school_newspaper, school_paper
tabloid, rag, sheet
yellow_journalism, tabloid, tab
telecommunication, telecom
telephone, telephony
voice_mail, voicemail
call, phone_call, telephone_call
call-back
collect_call
call_forwarding
call-in
call_waiting
crank_call
local_call
long_distance, long-distance_call, trunk_call
toll_call
wake-up_call
three-way_calling
telegraphy
cable, cablegram, overseas_telegram
wireless
radiotelegraph, radiotelegraphy, wireless_telegraphy
radiotelephone, radiotelephony, wireless_telephone
broadcasting
Rediffusion
multiplex
radio, radiocommunication, wireless
television, telecasting, TV, video
cable_television, cable
high-definition_television, HDTV
reception
signal_detection, detection
Hakham
web_site, website, internet_site, site
chat_room, chatroom
portal_site, portal
jotter
breviary
wordbook
desk_dictionary, collegiate_dictionary
reckoner, ready_reckoner
document, written_document, papers
album, record_album
concept_album
rock_opera
tribute_album, benefit_album
magazine, mag
colour_supplement
comic_book
news_magazine
pulp, pulp_magazine
slick, slick_magazine, glossy
trade_magazine
movie, film, picture, moving_picture, moving-picture_show, motion_picture, motion-picture_show, picture_show, pic, flick
outtake
shoot-'em-up
spaghetti_Western
encyclical, encyclical_letter
crossword_puzzle, crossword
sign
street_sign
traffic_light, traffic_signal, stoplight
swastika, Hakenkreuz
concert
artwork, art, graphics, nontextual_matter
lobe
book_jacket, dust_cover, dust_jacket, dust_wrapper
cairn
three-day_event
comfort_food
comestible, edible, eatable, pabulum, victual, victuals
tuck
course
dainty, delicacy, goody, kickshaw, treat
dish
fast_food
finger_food
ingesta
kosher
fare
diet
diet
dietary
balanced_diet
bland_diet, ulcer_diet
clear_liquid_diet
diabetic_diet
dietary_supplement
carbohydrate_loading, carbo_loading
fad_diet
gluten-free_diet
high-protein_diet
high-vitamin_diet, vitamin-deficiency_diet
light_diet
liquid_diet
low-calorie_diet
low-fat_diet
low-sodium_diet, low-salt_diet, salt-free_diet
macrobiotic_diet
reducing_diet, obesity_diet
soft_diet, pap, spoon_food
vegetarianism
menu
chow, chuck, eats, grub
board, table
mess
ration
field_ration
K_ration
C-ration
foodstuff, food_product
starches
breadstuff
coloring, colouring, food_coloring, food_colouring, food_color, food_colour
concentrate
tomato_concentrate
meal
kibble
cornmeal, Indian_meal
farina
matzo_meal, matzoh_meal, matzah_meal
oatmeal, rolled_oats
pea_flour
roughage, fiber
bran
flour
plain_flour
wheat_flour
whole_wheat_flour, graham_flour, graham, whole_meal_flour
soybean_meal, soybean_flour, soy_flour
semolina
corn_gluten_feed
nutriment, nourishment, nutrition, sustenance, aliment, alimentation, victuals
commissariat, provisions, provender, viands, victuals
larder
frozen_food, frozen_foods
canned_food, canned_foods, canned_goods, tinned_goods
canned_meat, tinned_meat
Spam
dehydrated_food, dehydrated_foods
square_meal
meal, repast
potluck
refection
refreshment
breakfast
continental_breakfast, petit_dejeuner
brunch
lunch, luncheon, tiffin, dejeuner
business_lunch
high_tea
tea, afternoon_tea, teatime
dinner
supper
buffet
picnic
cookout
barbecue, barbeque
clambake
fish_fry
bite, collation, snack
nosh
nosh-up
ploughman's_lunch
coffee_break, tea_break
banquet, feast, spread
entree, main_course
piece_de_resistance
plate
adobo
side_dish, side_order, entremets
special
casserole
chicken_casserole
chicken_cacciatore, chicken_cacciatora, hunter's_chicken
antipasto
appetizer, appetiser, starter
canape
cocktail
fruit_cocktail
crab_cocktail
shrimp_cocktail
hors_d'oeuvre
relish
dip
bean_dip
cheese_dip
clam_dip
guacamole
soup
soup_du_jour
alphabet_soup
consomme
madrilene
bisque
borsch, borsh, borscht, borsht, borshch, bortsch
broth
barley_water
bouillon
beef_broth, beef_stock
chicken_broth, chicken_stock
broth, stock
stock_cube
chicken_soup
cock-a-leekie, cocky-leeky
gazpacho
gumbo
julienne
marmite
mock_turtle_soup
mulligatawny
oxtail_soup
pea_soup
pepper_pot, Philadelphia_pepper_pot
petite_marmite, minestrone, vegetable_soup
potage, pottage
pottage
turtle_soup, green_turtle_soup
eggdrop_soup
chowder
corn_chowder
clam_chowder
Manhattan_clam_chowder
New_England_clam_chowder
fish_chowder
won_ton, wonton, wonton_soup
split-pea_soup
green_pea_soup, potage_St._Germain
lentil_soup
Scotch_broth
vichyssoise
stew
bigos
Brunswick_stew
burgoo
burgoo
olla_podrida, Spanish_burgoo
mulligan_stew, mulligan, Irish_burgoo
purloo, chicken_purloo, poilu
goulash, Hungarian_goulash, gulyas
hotchpotch
hot_pot, hotpot
beef_goulash
pork-and-veal_goulash
porkholt
Irish_stew
oyster_stew
lobster_stew
lobscouse, lobscuse, scouse
fish_stew
bouillabaisse
matelote
paella
fricassee
chicken_stew
turkey_stew
beef_stew
ragout
ratatouille
salmi
pot-au-feu
slumgullion
smorgasbord
viand
ready-mix
brownie_mix
cake_mix
lemonade_mix
self-rising_flour, self-raising_flour
choice_morsel, tidbit, titbit
savory, savoury
calf's-foot_jelly
caramel, caramelized_sugar
lump_sugar
cane_sugar
castor_sugar, caster_sugar
powdered_sugar
granulated_sugar
icing_sugar
corn_sugar
brown_sugar
demerara, demerara_sugar
sweet, confection
confectionery
confiture
sweetmeat
candy, confect
candy_bar
carob_bar
hardbake
hard_candy
barley-sugar, barley_candy
brandyball
jawbreaker
lemon_drop
sourball
patty
peppermint_patty
bonbon
brittle, toffee, toffy
peanut_brittle
chewing_gum, gum
gum_ball
bubble_gum
butterscotch
candied_fruit, succade, crystallized_fruit
candied_apple, candy_apple, taffy_apple, caramel_apple, toffee_apple
crystallized_ginger
grapefruit_peel
lemon_peel
orange_peel
candied_citrus_peel
candy_cane
candy_corn
caramel
center, centre
comfit
cotton_candy, spun_sugar, candyfloss
dragee
dragee
fondant
fudge
chocolate_fudge
divinity, divinity_fudge
penuche, penoche, panoche, panocha
gumdrop
jujube
honey_crisp
mint, mint_candy
horehound
peppermint, peppermint_candy
jelly_bean, jelly_egg
kiss, candy_kiss
molasses_kiss
meringue_kiss
chocolate_kiss
licorice, liquorice
Life_Saver
lollipop, sucker, all-day_sucker
lozenge
cachou
cough_drop, troche, pastille, pastil
marshmallow
marzipan, marchpane
nougat
nougat_bar
nut_bar
peanut_bar
popcorn_ball
praline
rock_candy
rock_candy, rock
sugar_candy
sugarplum
taffy
molasses_taffy
truffle, chocolate_truffle
Turkish_Delight
dessert, sweet, afters
ambrosia, nectar
ambrosia
baked_Alaska
blancmange
charlotte
compote, fruit_compote
dumpling
flan
frozen_dessert
junket
mousse
mousse
pavlova
peach_melba
whip
prune_whip
pudding
pudding, pud
syllabub, sillabub
tiramisu
trifle
tipsy_cake
jello, Jell-O
apple_dumpling
ice, frappe
water_ice, sorbet
ice_cream, icecream
ice-cream_cone
chocolate_ice_cream
Neapolitan_ice_cream
peach_ice_cream
sherbert, sherbet
strawberry_ice_cream
tutti-frutti
vanilla_ice_cream
ice_lolly, lolly, lollipop, popsicle
ice_milk
frozen_yogurt
snowball
snowball
parfait
ice-cream_sundae, sundae
split
banana_split
frozen_pudding
frozen_custard, soft_ice_cream
pudding
flummery
fish_mousse
chicken_mousse
chocolate_mousse
plum_pudding, Christmas_pudding
carrot_pudding
corn_pudding
steamed_pudding
duff, plum_duff
vanilla_pudding
chocolate_pudding
brown_Betty
Nesselrode, Nesselrode_pudding
pease_pudding
custard
creme_caramel
creme_anglais
creme_brulee
fruit_custard
tapioca
tapioca_pudding
roly-poly, roly-poly_pudding
suet_pudding
Bavarian_cream
maraschino, maraschino_cherry
nonpareil
zabaglione, sabayon
garnish
pastry, pastry_dough
turnover
apple_turnover
knish
pirogi, piroshki, pirozhki
samosa
timbale
puff_paste, pate_feuillete
phyllo
puff_batter, pouf_paste, pate_a_choux
ice-cream_cake, icebox_cake
doughnut, donut, sinker
fish_cake, fish_ball
fish_stick, fish_finger
conserve, preserve, conserves, preserves
apple_butter
chowchow
jam
lemon_curd, lemon_cheese
strawberry_jam, strawberry_preserves
jelly
apple_jelly
crabapple_jelly
grape_jelly
marmalade
orange_marmalade
gelatin, jelly
gelatin_dessert
buffalo_wing
barbecued_wing
mess
mince
puree
barbecue, barbeque
biryani, biriani
escalope_de_veau_Orloff
saute
patty, cake
veal_parmesan, veal_parmigiana
veal_cordon_bleu
margarine, margarin, oleo, oleomargarine, marge
mincemeat
stuffing, dressing
turkey_stuffing
oyster_stuffing, oyster_dressing
forcemeat, farce
bread, breadstuff, staff_of_life
anadama_bread
bap
barmbrack
breadstick, bread-stick
grissino
brown_bread, Boston_brown_bread
bun, roll
tea_bread
caraway_seed_bread
challah, hallah
cinnamon_bread
cracked-wheat_bread
cracker
crouton
dark_bread, whole_wheat_bread, whole_meal_bread, brown_bread
English_muffin
flatbread
garlic_bread
gluten_bread
graham_bread
Host
flatbrod
bannock
chapatti, chapati
pita, pocket_bread
loaf_of_bread, loaf
French_loaf
matzo, matzoh, matzah, unleavened_bread
nan, naan
onion_bread
raisin_bread
quick_bread
banana_bread
date_bread
date-nut_bread
nut_bread
oatcake
Irish_soda_bread
skillet_bread, fry_bread
rye_bread
black_bread, pumpernickel
Jewish_rye_bread, Jewish_rye
limpa
Swedish_rye_bread, Swedish_rye
salt-rising_bread
simnel
sour_bread, sourdough_bread
toast
wafer
white_bread, light_bread
baguet, baguette
French_bread
Italian_bread
cornbread
corn_cake
skillet_corn_bread
ashcake, ash_cake, corn_tash
hoecake
cornpone, pone
corn_dab, corn_dodger, dodger
hush_puppy, hushpuppy
johnnycake, johnny_cake, journey_cake
Shawnee_cake
spoon_bread, batter_bread
cinnamon_toast
orange_toast
Melba_toast
zwieback, rusk, Brussels_biscuit, twice-baked_bread
frankfurter_bun, hotdog_bun
hamburger_bun, hamburger_roll
muffin, gem
bran_muffin
corn_muffin
Yorkshire_pudding
popover
scone
drop_scone, griddlecake, Scotch_pancake
cross_bun, hot_cross_bun
brioche
crescent_roll, croissant
hard_roll, Vienna_roll
soft_roll
kaiser_roll
Parker_House_roll
clover-leaf_roll
onion_roll
bialy, bialystoker
sweet_roll, coffee_roll
bear_claw, bear_paw
cinnamon_roll, cinnamon_bun, cinnamon_snail
honey_bun, sticky_bun, caramel_bun, schnecken
pinwheel_roll
danish, danish_pastry
bagel, beigel
onion_bagel
biscuit
rolled_biscuit
baking-powder_biscuit
buttermilk_biscuit, soda_biscuit
shortcake
hardtack, pilot_biscuit, pilot_bread, sea_biscuit, ship_biscuit
saltine
soda_cracker
oyster_cracker
water_biscuit
graham_cracker
pretzel
soft_pretzel
sandwich
sandwich_plate
butty
ham_sandwich
chicken_sandwich
club_sandwich, three-decker, triple-decker
open-face_sandwich, open_sandwich
hamburger, beefburger, burger
cheeseburger
tunaburger
hotdog, hot_dog, red_hot
Sloppy_Joe
bomber, grinder, hero, hero_sandwich, hoagie, hoagy, Cuban_sandwich, Italian_sandwich, poor_boy, sub, submarine, submarine_sandwich, torpedo, wedge, zep
gyro
bacon-lettuce-tomato_sandwich, BLT
Reuben
western, western_sandwich
wrap
spaghetti
hasty_pudding
gruel
congee, jook
skilly
edible_fruit
vegetable, veggie, veg
julienne, julienne_vegetable
raw_vegetable, rabbit_food
crudites
celery_stick
legume
pulse
potherb
greens, green, leafy_vegetable
chop-suey_greens
bean_curd, tofu
solanaceous_vegetable
root_vegetable
potato, white_potato, Irish_potato, murphy, spud, tater
baked_potato
french_fries, french-fried_potatoes, fries, chips
home_fries, home-fried_potatoes
jacket_potato
mashed_potato
potato_skin, potato_peel, potato_peelings
Uruguay_potato
yam
sweet_potato
yam
snack_food
chip, crisp, potato_chip, Saratoga_chip
corn_chip
tortilla_chip
nacho
eggplant, aubergine, mad_apple
pieplant, rhubarb
cruciferous_vegetable
mustard, mustard_greens, leaf_mustard, Indian_mustard
cabbage, chou
kale, kail, cole
collards, collard_greens
Chinese_cabbage, celery_cabbage, Chinese_celery
bok_choy, bok_choi
head_cabbage
red_cabbage
savoy_cabbage, savoy
broccoli
cauliflower
brussels_sprouts
broccoli_rabe, broccoli_raab
squash
summer_squash
yellow_squash
crookneck, crookneck_squash, summer_crookneck
zucchini, courgette
marrow, vegetable_marrow
cocozelle
pattypan_squash
spaghetti_squash
winter_squash
acorn_squash
butternut_squash
hubbard_squash
turban_squash
buttercup_squash
cushaw
winter_crookneck_squash
cucumber, cuke
gherkin
artichoke, globe_artichoke
artichoke_heart
Jerusalem_artichoke, sunchoke
asparagus
bamboo_shoot
sprout
bean_sprout
alfalfa_sprout
beet, beetroot
beet_green
sugar_beet
mangel-wurzel
chard, Swiss_chard, spinach_beet, leaf_beet
pepper
sweet_pepper
bell_pepper
green_pepper
globe_pepper
pimento, pimiento
hot_pepper
chili, chili_pepper, chilli, chilly, chile
jalapeno, jalapeno_pepper
chipotle
cayenne, cayenne_pepper
tabasco, red_pepper
onion
Bermuda_onion
green_onion, spring_onion, scallion
Vidalia_onion
Spanish_onion
purple_onion, red_onion
leek
shallot
salad_green, salad_greens
lettuce
butterhead_lettuce
buttercrunch
Bibb_lettuce
Boston_lettuce
crisphead_lettuce, iceberg_lettuce, iceberg
cos, cos_lettuce, romaine, romaine_lettuce
leaf_lettuce, loose-leaf_lettuce
celtuce
bean, edible_bean
goa_bean
lentil
pea
green_pea, garden_pea
marrowfat_pea
snow_pea, sugar_pea
sugar_snap_pea
split-pea
chickpea, garbanzo
cajan_pea, pigeon_pea, dahl
field_pea
mushy_peas
black-eyed_pea, cowpea
common_bean
kidney_bean
navy_bean, pea_bean, white_bean
pinto_bean
frijole
black_bean, turtle_bean
fresh_bean
flageolet, haricot
green_bean
snap_bean, snap
string_bean
Kentucky_wonder, Kentucky_wonder_bean
scarlet_runner, scarlet_runner_bean, runner_bean, English_runner_bean
haricot_vert, haricots_verts, French_bean
wax_bean, yellow_bean
shell_bean
lima_bean
Fordhooks
sieva_bean, butter_bean, butterbean, civet_bean
fava_bean, broad_bean
soy, soybean, soya, soya_bean
green_soybean
field_soybean
cardoon
carrot
carrot_stick
celery
pascal_celery, Paschal_celery
celeriac, celery_root
chicory, curly_endive
radicchio
coffee_substitute
chicory, chicory_root
Postum
chicory_escarole, endive, escarole
Belgian_endive, French_endive, witloof
corn, edible_corn
sweet_corn, green_corn
hominy
lye_hominy
pearl_hominy
popcorn
cress
watercress
garden_cress
winter_cress
dandelion_green
gumbo, okra
kohlrabi, turnip_cabbage
lamb's-quarter, pigweed, wild_spinach
wild_spinach
tomato
beefsteak_tomato
cherry_tomato
plum_tomato
tomatillo, husk_tomato, Mexican_husk_tomato
mushroom
stuffed_mushroom
salsify
oyster_plant, vegetable_oyster
scorzonera, black_salsify
parsnip
pumpkin
radish
turnip
white_turnip
rutabaga, swede, swedish_turnip, yellow_turnip
turnip_greens
sorrel, common_sorrel
French_sorrel
spinach
taro, taro_root, cocoyam, dasheen, edda
truffle, earthnut
edible_nut
bunya_bunya
peanut, earthnut, goober, goober_pea, groundnut, monkey_nut
freestone
cling, clingstone
windfall
apple
crab_apple, crabapple
eating_apple, dessert_apple
Baldwin
Cortland
Cox's_Orange_Pippin
Delicious
Golden_Delicious, Yellow_Delicious
Red_Delicious
Empire
Grimes'_golden
Jonathan
McIntosh
Macoun
Northern_Spy
Pearmain
Pippin
Prima
Stayman
Winesap
Stayman_Winesap
cooking_apple
Bramley's_Seedling
Granny_Smith
Lane's_Prince_Albert
Newtown_Wonder
Rome_Beauty
berry
bilberry, whortleberry, European_blueberry
huckleberry
blueberry
wintergreen, boxberry, checkerberry, teaberry, spiceberry
cranberry
lingonberry, mountain_cranberry, cowberry, lowbush_cranberry
currant
gooseberry
black_currant
red_currant
blackberry
boysenberry
dewberry
loganberry
raspberry
saskatoon, serviceberry, shadberry, juneberry
strawberry
sugarberry, hackberry
persimmon
acerola, barbados_cherry, surinam_cherry, West_Indian_cherry
carambola, star_fruit
ceriman, monstera
carissa_plum, natal_plum
citrus, citrus_fruit, citrous_fruit
orange
temple_orange
mandarin, mandarin_orange
clementine
satsuma
tangerine
tangelo, ugli, ugli_fruit
bitter_orange, Seville_orange, sour_orange
sweet_orange
Jaffa_orange
navel_orange
Valencia_orange
kumquat
lemon
lime
key_lime
grapefruit
pomelo, shaddock
citrange
citron
almond
Jordan_almond
apricot
peach
nectarine
pitahaya
plum
damson, damson_plum
greengage, greengage_plum
beach_plum
sloe
Victoria_plum
dried_fruit
dried_apricot
prune
raisin
seedless_raisin, sultana
seeded_raisin
currant
fig
pineapple, ananas
anchovy_pear, river_pear
banana
passion_fruit
granadilla
sweet_calabash
bell_apple, sweet_cup, water_lemon, yellow_granadilla
breadfruit
jackfruit, jak, jack
cacao_bean, cocoa_bean
cocoa
canistel, eggfruit
melon
melon_ball
muskmelon, sweet_melon
cantaloup, cantaloupe
winter_melon
honeydew, honeydew_melon
Persian_melon
net_melon, netted_melon, nutmeg_melon
casaba, casaba_melon
watermelon
cherry
sweet_cherry, black_cherry
bing_cherry
heart_cherry, oxheart, oxheart_cherry
blackheart, blackheart_cherry
capulin, Mexican_black_cherry
sour_cherry
amarelle
morello
cocoa_plum, coco_plum, icaco
gherkin
grape
fox_grape
Concord_grape
Catawba
muscadine, bullace_grape
scuppernong
slipskin_grape
vinifera_grape
emperor
muscat, muscatel, muscat_grape
ribier
sultana
Tokay
flame_tokay
Thompson_Seedless
custard_apple
cherimoya, cherimolla
soursop, guanabana
sweetsop, annon, sugar_apple
ilama
pond_apple
papaw, pawpaw
papaya
kai_apple
ketembilla, kitembilla, kitambilla
ackee, akee
durian
feijoa, pineapple_guava
genip, Spanish_lime
genipap, genipap_fruit
kiwi, kiwi_fruit, Chinese_gooseberry
loquat, Japanese_plum
mangosteen
mango
sapodilla, sapodilla_plum, sapota
sapote, mammee, marmalade_plum
tamarind, tamarindo
avocado, alligator_pear, avocado_pear, aguacate
date
elderberry
guava
mombin
hog_plum, yellow_mombin
hog_plum, wild_plum
jaboticaba
jujube, Chinese_date, Chinese_jujube
litchi, litchi_nut, litchee, lichi, leechee, lichee, lychee
longanberry, dragon's_eye
mamey, mammee, mammee_apple
marang
medlar
medlar
mulberry
olive
black_olive, ripe_olive
green_olive
pear
bosc
anjou
bartlett, bartlett_pear
seckel, seckel_pear
plantain
plumcot
pomegranate
prickly_pear
Barbados_gooseberry, blade_apple
quandong, quandang, quantong, native_peach
quandong_nut
quince
rambutan, rambotan
pulasan, pulassan
rose_apple
sorb, sorb_apple
sour_gourd, monkey_bread
edible_seed
pumpkin_seed
betel_nut, areca_nut
beechnut
walnut
black_walnut
English_walnut
brazil_nut, brazil
butternut
souari_nut
cashew, cashew_nut
chestnut
chincapin, chinkapin, chinquapin
hazelnut, filbert, cobnut, cob
coconut, cocoanut
coconut_milk, coconut_water
grugru_nut
hickory_nut
cola_extract
macadamia_nut
pecan
pine_nut, pignolia, pinon_nut
pistachio, pistachio_nut
sunflower_seed
anchovy_paste
rollmops
feed, provender
cattle_cake
creep_feed
fodder
feed_grain
eatage, forage, pasture, pasturage, grass
silage, ensilage
oil_cake
oil_meal
alfalfa
broad_bean, horse_bean
hay
timothy
stover
grain, food_grain, cereal
grist
groats
millet
barley, barleycorn
pearl_barley
buckwheat
bulgur, bulghur, bulgur_wheat
wheat, wheat_berry
cracked_wheat
stodge
wheat_germ
oat
rice
brown_rice
white_rice, polished_rice
wild_rice, Indian_rice
paddy
slop, slops, swill, pigswill, pigwash
mash
chicken_feed, scratch
cud, rechewed_food
bird_feed, bird_food, birdseed
petfood, pet-food, pet_food
dog_food
cat_food
canary_seed
salad
tossed_salad
green_salad
Caesar_salad
salmagundi
salad_nicoise
combination_salad
chef's_salad
potato_salad
pasta_salad
macaroni_salad
fruit_salad
Waldorf_salad
crab_Louis
herring_salad
tuna_fish_salad, tuna_salad
chicken_salad
coleslaw, slaw
aspic
molded_salad
tabbouleh, tabooli
ingredient, fixings
flavorer, flavourer, flavoring, flavouring, seasoner, seasoning
bouillon_cube
condiment
herb
fines_herbes
spice
spearmint_oil
lemon_oil
wintergreen_oil, oil_of_wintergreen
salt, table_salt, common_salt
celery_salt
onion_salt
seasoned_salt
sour_salt
five_spice_powder
allspice
cinnamon
stick_cinnamon
clove
cumin, cumin_seed
fennel
ginger, gingerroot
ginger, powdered_ginger
mace
nutmeg
pepper, peppercorn
black_pepper
white_pepper
sassafras
basil, sweet_basil
bay_leaf
borage
hyssop
caraway
chervil
chives
comfrey, healing_herb
coriander, Chinese_parsley, cilantro
coriander, coriander_seed
costmary
fennel, common_fennel
fennel, Florence_fennel, finocchio
fennel_seed
fenugreek, fenugreek_seed
garlic, ail
clove, garlic_clove
garlic_chive
lemon_balm
lovage
marjoram, oregano
mint
mustard_seed
mustard, table_mustard
Chinese_mustard
nasturtium
parsley
salad_burnet
rosemary
rue
sage
clary_sage
savory, savoury
summer_savory, summer_savoury
winter_savory, winter_savoury
sweet_woodruff, waldmeister
sweet_cicely
tarragon, estragon
thyme
turmeric
caper
catsup, ketchup, cetchup, tomato_ketchup
cardamom, cardamon, cardamum
cayenne, cayenne_pepper, red_pepper
chili_powder
chili_sauce
chutney, Indian_relish
steak_sauce
taco_sauce
salsa
mint_sauce
cranberry_sauce
curry_powder
curry
lamb_curry
duck_sauce, hoisin_sauce
horseradish
marinade
paprika
Spanish_paprika
pickle
dill_pickle
bread_and_butter_pickle
pickle_relish
piccalilli
sweet_pickle
applesauce, apple_sauce
soy_sauce, soy
Tabasco, Tabasco_sauce
tomato_paste
angelica
angelica
almond_extract
anise, aniseed, anise_seed
Chinese_anise, star_anise, star_aniseed
juniper_berries
saffron
sesame_seed, benniseed
caraway_seed
poppy_seed
dill, dill_weed
dill_seed
celery_seed
lemon_extract
monosodium_glutamate, MSG
vanilla_bean
vinegar, acetum
cider_vinegar
wine_vinegar
sauce
anchovy_sauce
hot_sauce
hard_sauce
horseradish_sauce, sauce_Albert
bolognese_pasta_sauce
carbonara
tomato_sauce
tartare_sauce, tartar_sauce
wine_sauce
marchand_de_vin, mushroom_wine_sauce
bread_sauce
plum_sauce
peach_sauce
apricot_sauce
pesto
ravigote, ravigotte
remoulade_sauce
dressing, salad_dressing
sauce_Louis
bleu_cheese_dressing, blue_cheese_dressing
blue_cheese_dressing, Roquefort_dressing
French_dressing, vinaigrette, sauce_vinaigrette
Lorenzo_dressing
anchovy_dressing
Italian_dressing
half-and-half_dressing
mayonnaise, mayo
green_mayonnaise, sauce_verte
aioli, aioli_sauce, garlic_sauce
Russian_dressing, Russian_mayonnaise
salad_cream
Thousand_Island_dressing
barbecue_sauce
hollandaise
bearnaise
Bercy, Bercy_butter
bordelaise
bourguignon, bourguignon_sauce, Burgundy_sauce
brown_sauce, sauce_Espagnole
Espagnole, sauce_Espagnole
Chinese_brown_sauce, brown_sauce
blanc
cheese_sauce
chocolate_sauce, chocolate_syrup
hot-fudge_sauce, fudge_sauce
cocktail_sauce, seafood_sauce
Colbert, Colbert_butter
white_sauce, bechamel_sauce, bechamel
cream_sauce
Mornay_sauce
demiglace, demi-glaze
gravy, pan_gravy
gravy
spaghetti_sauce, pasta_sauce
marinara
mole
hunter's_sauce, sauce_chausseur
mushroom_sauce
mustard_sauce
Nantua, shrimp_sauce
Hungarian_sauce, paprika_sauce
pepper_sauce, Poivrade
roux
Smitane
Soubise, white_onion_sauce
Lyonnaise_sauce, brown_onion_sauce
veloute
allemande, allemande_sauce
caper_sauce
poulette
curry_sauce
Worcester_sauce, Worcestershire, Worcestershire_sauce
coconut_milk, coconut_cream
egg, eggs
egg_white, white, albumen, ovalbumin
egg_yolk, yolk
boiled_egg, coddled_egg
hard-boiled_egg, hard-cooked_egg
Easter_egg
Easter_egg
chocolate_egg
candy_egg
poached_egg, dropped_egg
scrambled_eggs
deviled_egg, stuffed_egg
shirred_egg, baked_egg, egg_en_cocotte
omelet, omelette
firm_omelet
French_omelet
fluffy_omelet
western_omelet
souffle
fried_egg
dairy_product
milk
milk
sour_milk
soya_milk, soybean_milk, soymilk
formula
pasteurized_milk
cows'_milk
yak's_milk
goats'_milk
acidophilus_milk
raw_milk
scalded_milk
homogenized_milk
certified_milk
powdered_milk, dry_milk, dried_milk, milk_powder
nonfat_dry_milk
evaporated_milk
condensed_milk
skim_milk, skimmed_milk
semi-skimmed_milk
whole_milk
low-fat_milk
buttermilk
cream
clotted_cream, Devonshire_cream
double_creme, heavy_whipping_cream
half-and-half
heavy_cream
light_cream, coffee_cream, single_cream
sour_cream, soured_cream
whipping_cream, light_whipping_cream
butter
clarified_butter, drawn_butter
ghee
brown_butter, beurre_noisette
Meuniere_butter, lemon_butter
yogurt, yoghurt, yoghourt
blueberry_yogurt
raita
whey
curd
curd
clabber
cheese
paring
cream_cheese
double_cream
mascarpone
triple_cream, triple_creme
cottage_cheese, pot_cheese, farm_cheese, farmer's_cheese
process_cheese, processed_cheese
bleu, blue_cheese
Stilton
Roquefort
gorgonzola
Danish_blue
Bavarian_blue
Brie
brick_cheese
Camembert
cheddar, cheddar_cheese, Armerican_cheddar, American_cheese
rat_cheese, store_cheese
Cheshire_cheese
double_Gloucester
Edam
goat_cheese, chevre
Gouda, Gouda_cheese
grated_cheese
hand_cheese
Liederkranz
Limburger
mozzarella
Muenster
Parmesan
quark_cheese, quark
ricotta
string_cheese
Swiss_cheese
Emmenthal, Emmental, Emmenthaler, Emmentaler
Gruyere
sapsago
Velveeta
nut_butter
peanut_butter
marshmallow_fluff
onion_butter
pimento_butter
shrimp_butter
lobster_butter
yak_butter
spread, paste
cheese_spread
anchovy_butter
fishpaste
garlic_butter
miso
wasabi
snail_butter
hummus, humus, hommos, hoummos, humous
pate
duck_pate
foie_gras, pate_de_foie_gras
tapenade
tahini
sweetening, sweetener
aspartame
honey
saccharin
sugar, refined_sugar
syrup, sirup
sugar_syrup
molasses
sorghum, sorghum_molasses
treacle, golden_syrup
grenadine
maple_syrup
corn_syrup
miraculous_food, manna, manna_from_heaven
batter
dough
bread_dough
pancake_batter
fritter_batter
coq_au_vin
chicken_provencale
chicken_and_rice
moo_goo_gai_pan
arroz_con_pollo
bacon_and_eggs
barbecued_spareribs, spareribs
beef_Bourguignonne, boeuf_Bourguignonne
beef_Wellington, filet_de_boeuf_en_croute
bitok
boiled_dinner, New_England_boiled_dinner
Boston_baked_beans
bubble_and_squeak
pasta
cannelloni
carbonnade_flamande, Belgian_beef_stew
cheese_souffle
chicken_Marengo
chicken_cordon_bleu
Maryland_chicken
chicken_paprika, chicken_paprikash
chicken_Tetrazzini
Tetrazzini
chicken_Kiev
chili, chili_con_carne
chili_dog
chop_suey
chow_mein
codfish_ball, codfish_cake
coquille
coquilles_Saint-Jacques
croquette
cottage_pie
rissole
dolmas, stuffed_grape_leaves
egg_foo_yong, egg_fu_yung
egg_roll, spring_roll
eggs_Benedict
enchilada
falafel, felafel
fish_and_chips
fondue, fondu
cheese_fondue
chocolate_fondue
fondue, fondu
beef_fondue, boeuf_fondu_bourguignon
French_toast
fried_rice, Chinese_fried_rice
frittata
frog_legs
galantine
gefilte_fish, fish_ball
haggis
ham_and_eggs
hash
corned_beef_hash
jambalaya
kabob, kebab, shish_kebab
kedgeree
souvlaki, souvlakia
lasagna, lasagne
seafood_Newburg
lobster_Newburg, lobster_a_la_Newburg
shrimp_Newburg
Newburg_sauce
lobster_thermidor
lutefisk, lutfisk
macaroni_and_cheese
macedoine
meatball
porcupine_ball, porcupines
Swedish_meatball
meat_loaf, meatloaf
moussaka
osso_buco
marrow, bone_marrow
pheasant_under_glass
pigs_in_blankets
pilaf, pilaff, pilau, pilaw
bulgur_pilaf
pizza, pizza_pie
sausage_pizza
pepperoni_pizza
cheese_pizza
anchovy_pizza
Sicilian_pizza
poi
pork_and_beans
porridge
oatmeal, burgoo
loblolly
potpie
rijsttaffel, rijstaffel, rijstafel
risotto, Italian_rice
roulade
fish_loaf
salmon_loaf
Salisbury_steak
sauerbraten
sauerkraut
scallopine, scallopini
veal_scallopini
scampi
Scotch_egg
Scotch_woodcock
scrapple
spaghetti_and_meatballs
Spanish_rice
steak_tartare, tartar_steak, cannibal_mound
pepper_steak
steak_au_poivre, peppered_steak, pepper_steak
beef_Stroganoff
stuffed_cabbage
kishke, stuffed_derma
stuffed_peppers
stuffed_tomato, hot_stuffed_tomato
stuffed_tomato, cold_stuffed_tomato
succotash
sukiyaki
sashimi
sushi
Swiss_steak
tamale
tamale_pie
tempura
teriyaki
terrine
Welsh_rarebit, Welsh_rabbit, rarebit
schnitzel, Wiener_schnitzel
taco
chicken_taco
burrito
beef_burrito
quesadilla
tostada
bean_tostada
refried_beans, frijoles_refritos
beverage, drink, drinkable, potable
wish-wash
concoction, mixture, intermixture
mix, premix
filling
lekvar
potion
elixir
elixir_of_life
philter, philtre, love-potion, love-philter, love-philtre
alcohol, alcoholic_drink, alcoholic_beverage, intoxicant, inebriant
proof_spirit
home_brew, homebrew
hooch, hootch
kava, kavakava
aperitif
brew, brewage
beer
draft_beer, draught_beer
suds
Munich_beer, Munchener
bock, bock_beer
lager, lager_beer
light_beer
Oktoberfest, Octoberfest
Pilsner, Pilsener
shebeen
Weissbier, white_beer, wheat_beer
Weizenbock
malt
wort
malt, malt_liquor
ale
bitter
Burton
pale_ale
porter, porter's_beer
stout
Guinness
kvass
mead
metheglin
hydromel
oenomel
near_beer
ginger_beer
sake, saki, rice_beer
wine, vino
vintage
red_wine
white_wine
blush_wine, pink_wine, rose, rose_wine
altar_wine, sacramental_wine
sparkling_wine
champagne, bubbly
cold_duck
Burgundy, Burgundy_wine
Beaujolais
Medoc
Canary_wine
Chablis, white_Burgundy
Montrachet
Chardonnay, Pinot_Chardonnay
Pinot_noir
Pinot_blanc
Bordeaux, Bordeaux_wine
claret, red_Bordeaux
Chianti
Cabernet, Cabernet_Sauvignon
Merlot
Sauvignon_blanc
California_wine
Cotes_de_Provence
dessert_wine
Dubonnet
jug_wine
macon, maconnais
Moselle
Muscadet
plonk
retsina
Rhine_wine, Rhenish, hock
Riesling
liebfraumilch
Rhone_wine
Rioja
sack
Saint_Emilion
Soave
zinfandel
Sauterne, Sauternes
straw_wine
table_wine
Tokay
vin_ordinaire
vermouth
sweet_vermouth, Italian_vermouth
dry_vermouth, French_vermouth
Chenin_blanc
Verdicchio
Vouvray
Yquem
generic, generic_wine
varietal, varietal_wine
fortified_wine
Madeira
malmsey
port, port_wine
sherry
Marsala
muscat, muscatel, muscadel, muscadelle
liquor, spirits, booze, hard_drink, hard_liquor, John_Barleycorn, strong_drink
neutral_spirits, ethyl_alcohol
aqua_vitae, ardent_spirits
eau_de_vie
moonshine, bootleg, corn_liquor
bathtub_gin
aquavit, akvavit
arrack, arak
bitters
brandy
applejack
Calvados
Armagnac
Cognac
grappa
kirsch
slivovitz
gin
sloe_gin
geneva, Holland_gin, Hollands
grog
ouzo
rum
demerara, demerara_rum
Jamaica_rum
schnapps, schnaps
pulque
mescal
tequila
vodka
whiskey, whisky
blended_whiskey, blended_whisky
bourbon
corn_whiskey, corn_whisky, corn
firewater
Irish, Irish_whiskey, Irish_whisky
poteen
rye, rye_whiskey, rye_whisky
Scotch, Scotch_whiskey, Scotch_whisky, malt_whiskey, malt_whisky, Scotch_malt_whiskey, Scotch_malt_whisky
sour_mash, sour_mash_whiskey
liqueur, cordial
absinth, absinthe
amaretto
anisette, anisette_de_Bordeaux
benedictine
Chartreuse
coffee_liqueur
creme_de_cacao
creme_de_menthe
creme_de_fraise
Drambuie
Galliano
orange_liqueur
curacao, curacoa
triple_sec
Grand_Marnier
kummel
maraschino, maraschino_liqueur
pastis
Pernod
pousse-cafe
Kahlua
ratafia, ratafee
sambuca
mixed_drink
cocktail
Dom_Pedro
highball
mixer
bishop
Bloody_Mary
Virgin_Mary, bloody_shame
bullshot
cobbler
collins, Tom_Collins
cooler
refresher
smoothie
daiquiri, rum_cocktail
strawberry_daiquiri
NADA_daiquiri
spritzer
flip
gimlet
gin_and_tonic
grasshopper
Harvey_Wallbanger
julep, mint_julep
manhattan
Rob_Roy
margarita
martini
gin_and_it
vodka_martini
old_fashioned
pink_lady
Sazerac
screwdriver
sidecar
Scotch_and_soda
sling
brandy_sling
gin_sling
rum_sling
sour
whiskey_sour, whisky_sour
stinger
swizzle
hot_toddy, toddy
zombie, zombi
fizz
Irish_coffee
cafe_au_lait
cafe_noir, demitasse
decaffeinated_coffee, decaf
drip_coffee
espresso
caffe_latte, latte
cappuccino, cappuccino_coffee, coffee_cappuccino
iced_coffee, ice_coffee
instant_coffee
mocha, mocha_coffee
mocha
cassareep
Turkish_coffee
chocolate_milk
cider, cyder
hard_cider
scrumpy
sweet_cider
mulled_cider
perry
rotgut
slug
cocoa, chocolate, hot_chocolate, drinking_chocolate
criollo
juice
fruit_juice, fruit_crush
nectar
apple_juice
cranberry_juice
grape_juice
must
grapefruit_juice
orange_juice
frozen_orange_juice, orange-juice_concentrate
pineapple_juice
lemon_juice
lime_juice
papaya_juice
tomato_juice
carrot_juice
V-8_juice
koumiss, kumis
fruit_drink, ade
lemonade
limeade
orangeade
malted_milk
mate
mulled_wine
negus
soft_drink
pop, soda, soda_pop, soda_water, tonic
birch_beer
bitter_lemon
cola, dope
cream_soda
egg_cream
ginger_ale, ginger_pop
orange_soda
phosphate
Coca_Cola, Coke
Pepsi, Pepsi_Cola
root_beer
sarsaparilla
tonic, tonic_water, quinine_water
coffee_bean, coffee_berry, coffee
coffee, java
cafe_royale, coffee_royal
fruit_punch
milk_punch
mimosa, buck's_fizz
pina_colada
punch
cup
champagne_cup
claret_cup
wassail
planter's_punch
White_Russian
fish_house_punch
May_wine
eggnog
cassiri
spruce_beer
rickey
gin_rickey
tea, tea_leaf
tea_bag
tea
tea-like_drink
cambric_tea
cuppa, cupper
herb_tea, herbal_tea, herbal
tisane
camomile_tea
ice_tea, iced_tea
sun_tea
black_tea
congou, congo, congou_tea, English_breakfast_tea
Darjeeling
orange_pekoe, pekoe
souchong, soochong
green_tea
hyson
oolong
water
bottled_water
branch_water
spring_water
sugar_water
drinking_water
ice_water
soda_water, carbonated_water, club_soda, seltzer, sparkling_water
mineral_water
seltzer
Vichy_water
perishable, spoilable
couscous
ramekin, ramequin
multivitamin, multivitamin_pill
vitamin_pill
soul_food
mold, mould
people
collection, aggregation, accumulation, assemblage
book, rule_book
library
baseball_club, ball_club, club, nine
crowd
class, form, grade, course
core, nucleus, core_group
concert_band, military_band
dance
wedding, wedding_party
chain, concatenation
power_breakfast
aerie, aery, eyrie, eyry
agora
amusement_park, funfair, pleasure_ground
aphelion
apron
interplanetary_space
interstellar_space
intergalactic_space
bush
semidesert
beam-ends
bridgehead
bus_stop
campsite, campground, camping_site, camping_ground, bivouac, encampment, camping_area
detention_basin
cemetery, graveyard, burial_site, burial_ground, burying_ground, memorial_park, necropolis
trichion, crinion
city, metropolis, urban_center
business_district, downtown
outskirts
borough
cow_pasture
crest
eparchy, exarchate
suburb, suburbia, suburban_area
stockbroker_belt
crawlspace, crawl_space
sheikdom, sheikhdom
residence, abode
domicile, legal_residence
dude_ranch
farmland, farming_area
midfield
firebreak, fireguard
flea_market
battlefront, front, front_line
garbage_heap, junk_heap, rubbish_heap, scrapheap, trash_heap, junk_pile, trash_pile, refuse_heap
benthos, benthic_division, benthonic_zone
goldfield
grainfield, grain_field
half-mast, half-staff
hemline
heronry
hipline
hipline
hole-in-the-wall
junkyard
isoclinic_line, isoclinal
littoral, litoral, littoral_zone, sands
magnetic_pole
grassland
mecca
observer's_meridian
prime_meridian
nombril
no-parking_zone
outdoors, out-of-doors, open_air, open
fairground
pasture, pastureland, grazing_land, lea, ley
perihelion
periselene, perilune
locus_of_infection
kasbah, casbah
waterfront
resort, resort_hotel, holiday_resort
resort_area, playground, vacation_spot
rough
ashram
harborage, harbourage
scrubland
weald
wold
schoolyard
showplace
bedside
sideline, out_of_bounds
ski_resort
soil_horizon
geological_horizon
coal_seam
coalface
field
oilfield
Temperate_Zone
terreplein
three-mile_limit
desktop
top
kampong, campong
subtropics, semitropics
barrio
veld, veldt
vertex, peak, apex, acme
waterline, water_line, water_level
high-water_mark
low-water_mark
continental_divide
zodiac
Aegean_island
sultanate
Swiss_canton
abyssal_zone
aerie, aery, eyrie, eyry
air_bubble
alluvial_flat, alluvial_plain
alp
Alpine_glacier, Alpine_type_of_glacier
anthill, formicary
aquifer
archipelago
arete
arroyo
ascent, acclivity, rise, raise, climb, upgrade
asterism
asthenosphere
atoll
bank
bank
bar
barbecue_pit
barrier_reef
baryon, heavy_particle
basin
beach
honeycomb
belay
ben
berm
bladder_stone, cystolith
bluff
borrow_pit
brae
bubble
burrow, tunnel
butte
caldera
canyon, canon
canyonside
cave
cavern
chasm
cirque, corrie, cwm
cliff, drop, drop-off
cloud
coast
coastland
col, gap
collector
comet
continental_glacier
coral_reef
cove
crag
crater
cultivated_land, farmland, plowland, ploughland, tilled_land, tillage, tilth
dale
defile, gorge
delta
descent, declivity, fall, decline, declination, declension, downslope
diapir
divot
divot
down
downhill
draw
drey
drumlin
dune, sand_dune
escarpment, scarp
esker
fireball
flare_star
floor
fomite, vehicle
foothill
footwall
foreland
foreshore
gauge_boson
geological_formation, formation
geyser
glacier
glen
gopher_hole
gorge
grotto, grot
growler
gulch, flume
gully
hail
highland, upland
hill
hillside
hole, hollow
hollow, holler
hot_spring, thermal_spring
iceberg, berg
icecap, ice_cap
ice_field
ice_floe, floe
ice_mass
inclined_fault
ion
isthmus
kidney_stone, urinary_calculus, nephrolith, renal_calculus
knoll, mound, hillock, hummock, hammock
kopje, koppie
Kuiper_belt, Edgeworth-Kuiper_belt
lake_bed, lake_bottom
lakefront
lakeside, lakeshore
landfall
landfill
lather
leak
ledge, shelf
lepton
lithosphere, geosphere
lowland
lunar_crater
maar
massif
meander
mesa, table
meteorite
microfossil
midstream
molehill
monocline
mountain, mount
mountainside, versant
mouth
mull
natural_depression, depression
natural_elevation, elevation
nullah
ocean
ocean_floor, sea_floor, ocean_bottom, seabed, sea_bottom, Davy_Jones's_locker, Davy_Jones
oceanfront
outcrop, outcropping, rock_outcrop
oxbow
pallasite
perforation
photosphere
piedmont
Piedmont_glacier, Piedmont_type_of_glacier
pinetum
plage
plain, field, champaign
point
polar_glacier
pothole, chuckhole
precipice
promontory, headland, head, foreland
ptyalith
pulsar
quicksand
rabbit_burrow, rabbit_hole
radiator
rainbow
range, mountain_range, range_of_mountains, chain, mountain_chain, chain_of_mountains
rangeland
ravine
reef
ridge
ridge, ridgeline
rift_valley
riparian_forest
ripple_mark
riverbank, riverside
riverbed, river_bottom
rock, stone
roof
saltpan
sandbank
sandbar, sand_bar
sandpit
sanitary_landfill
sawpit
scablands
seashore, coast, seacoast, sea-coast
seaside, seaboard
seif_dune
shell
shiner
shoal
shore
shoreline
sinkhole, sink, swallow_hole
ski_slope
sky
slope, incline, side
snowcap
snowdrift
snowfield
soapsuds, suds, lather
spit, tongue
spoor
spume
star
steep
steppe
strand
streambed, creek_bed
sun, Sun
supernova
swale
swamp, swampland
swell
tableland, plateau
talus, scree
tangle
tar_pit
terrace, bench
tidal_basin
tideland
tor
tor
Trapezium
troposphere
tundra
twinkler
uphill
urolith
valley, vale
vehicle-borne_transmission
vein, mineral_vein
volcanic_crater, crater
volcano
wadi
wall
warren, rabbit_warren
wasp's_nest, wasps'_nest, hornet's_nest, hornets'_nest
watercourse
waterside
water_table, water_level, groundwater_level
whinstone, whin
wormcast
xenolith
Circe
gryphon, griffin, griffon
spiritual_leader
messiah, christ
Rhea_Silvia, Rea_Silvia
number_one
adventurer, venturer
anomaly, unusual_person
appointee, appointment
argonaut
Ashkenazi
benefactor, helper
color-blind_person
commoner, common_man, common_person
conservator
contrarian
contadino
contestant
cosigner, cosignatory
discussant
enologist, oenologist, fermentologist
entertainer
eulogist, panegyrist
ex-gambler
experimenter
experimenter
exponent
ex-president
face
female, female_person
finisher
inhabitant, habitant, dweller, denizen, indweller
native, indigen, indigene, aborigine, aboriginal
native
juvenile, juvenile_person
lover
male, male_person
mediator, go-between, intermediator, intermediary, intercessor
mediatrix
national, subject
peer, equal, match, compeer
prize_winner, lottery_winner
recipient, receiver
religionist
sensualist
traveler, traveller
unwelcome_person, persona_non_grata
unskilled_person
worker
wrongdoer, offender
Black_African
Afrikaner, Afrikander, Boer
Aryan
Black, Black_person, blackamoor, Negro, Negroid
Black_woman
mulatto
White, White_person, Caucasian
Circassian
Semite
Chaldean, Chaldaean, Chaldee
Elamite
white_man
WASP, white_Anglo-Saxon_Protestant
gook, slant-eye
Mongol, Mongolian
Tatar, Tartar, Mongol_Tatar
Nahuatl
Aztec
Olmec
Biloxi
Blackfoot
Brule
Caddo
Cheyenne
Chickasaw
Cocopa, Cocopah
Comanche
Creek
Delaware
Diegueno
Esselen
Eyeish
Havasupai
Hunkpapa
Iowa, Ioway
Kalapooia, Kalapuya, Calapooya, Calapuya
Kamia
Kekchi
Kichai
Kickapoo
Kiliwa, Kiliwi
Malecite
Maricopa
Mohican, Mahican
Muskhogean, Muskogean
Navaho, Navajo
Nootka
Oglala, Ogalala
Osage
Oneida
Paiute, Piute
Passamaquody
Penobscot
Penutian
Potawatomi
Powhatan
kachina
Salish
Shahaptian, Sahaptin, Sahaptino
Shasta
Shawnee
Sihasapa
Teton, Lakota, Teton_Sioux, Teton_Dakota
Taracahitian
Tarahumara
Tuscarora
Tutelo
Yana
Yavapai
Yokuts
Yuma
Gadaba
Kolam
Kui
Toda
Tulu
Gujarati, Gujerati
Kashmiri
Punjabi, Panjabi
Slav
Anabaptist
Adventist, Second_Adventist
gentile, non-Jew, goy
gentile
Catholic
Old_Catholic
Uniat, Uniate, Uniate_Christian
Copt
Jewess
Jihadist
Buddhist
Zen_Buddhist
Mahayanist
swami
Hare_Krishna
Shintoist
Eurafrican
Eurasian
Gael
Frank
Afghan, Afghanistani
Albanian
Algerian
Altaic
Andorran
Angolan
Anguillan
Austrian
Bahamian
Bahraini, Bahreini
Basotho
Herero
Luba, Chiluba
Barbadian
Bolivian
Bornean
Carioca
Tupi
Bruneian
Bulgarian
Byelorussian, Belorussian, White_Russian
Cameroonian
Canadian
French_Canadian
Central_American
Chilean
Congolese
Cypriot, Cypriote, Cyprian
Dane
Djiboutian
Britisher, Briton, Brit
English_person
Englishwoman
Anglo-Saxon
Angle
West_Saxon
Lombard, Langobard
limey, John_Bull
Cantabrigian
Cornishman
Cornishwoman
Lancastrian
Lancastrian
Geordie
Oxonian
Ethiopian
Amhara
Eritrean
Finn
Komi
Livonian
Lithuanian
Selkup, Ostyak-Samoyed
Parisian
Parisienne
Creole
Creole
Gabonese
Greek, Hellene
Dorian
Athenian
Laconian
Guyanese
Haitian
Malay, Malayan
Moro
Netherlander, Dutchman, Hollander
Icelander
Iraqi, Iraki
Irishman
Irishwoman
Dubliner
Italian
Roman
Sabine
Japanese, Nipponese
Jordanian
Korean
Kenyan
Lao, Laotian
Lapp, Lapplander, Sami, Saami, Same, Saame
Latin_American, Latino
Lebanese
Levantine
Liberian
Luxemburger, Luxembourger
Macedonian
Sabahan
Mexican
Chicano
Mexican-American, Mexicano
Namibian
Nauruan
Gurkha
New_Zealander, Kiwi
Nicaraguan
Nigerian
Hausa, Haussa
North_American
Nova_Scotian, bluenose
Omani
Pakistani
Brahui
South_American_Indian
Carib, Carib_Indian
Filipino
Polynesian
Qatari, Katari
Romanian, Rumanian
Muscovite
Georgian
Sarawakian
Scandinavian, Norse, Northman
Senegalese
Slovene
South_African
South_American
Sudanese
Syrian
Tahitian
Tanzanian
Tibetan
Togolese
Tuareg
Turki
Chuvash
Turkoman, Turkmen, Turcoman
Uzbek, Uzbeg, Uzbak, Usbek, Usbeg
Ugandan
Ukranian
Yakut
Tungus, Evenk
Igbo
American
Anglo-American
Alaska_Native, Alaskan_Native, Native_Alaskan
Arkansan, Arkansawyer
Carolinian
Coloradan
Connecticuter
Delawarean, Delawarian
Floridian
German_American
Illinoisan
Mainer, Down_Easter
Marylander
Minnesotan, Gopher
Nebraskan, Cornhusker
New_Hampshirite, Granite_Stater
New_Jerseyan, New_Jerseyite, Garden_Stater
New_Yorker
North_Carolinian, Tarheel
Oregonian, Beaver
Pennsylvanian, Keystone_Stater
Texan
Utahan
Uruguayan
Vietnamese, Annamese
Gambian
East_German
Berliner
Prussian
Ghanian
Guinean
Papuan
Walloon
Yemeni
Yugoslav, Jugoslav, Yugoslavian, Jugoslavian
Serbian, Serb
Xhosa
Zairese, Zairean
Zimbabwean
Zulu
Gemini, Twin
Sagittarius, Archer
Pisces, Fish
abbe
abbess, mother_superior, prioress
abnegator
abridger, abbreviator
abstractor, abstracter
absconder
absolver
abecedarian
aberrant
abettor, abetter
abhorrer
abomination
abseiler, rappeller
abstainer, ascetic
academic_administrator
academician
accessory_before_the_fact
companion
accompanist, accompanyist
accomplice, confederate
account_executive, account_representative, registered_representative, customer's_broker, customer's_man
accused
accuser
acid_head
acquaintance, friend
acquirer
aerialist
action_officer
active
active_citizen
actor, histrion, player, thespian, role_player
actor, doer, worker
addict, nut, freak, junkie, junky
adducer
adjuster, adjustor, claims_adjuster, claims_adjustor, claim_agent
adjutant, aide, aide-de-camp
adjutant_general
admirer, adorer
adoptee
adulterer, fornicator
adulteress, fornicatress, hussy, jade, loose_woman, slut, strumpet, trollop
advertiser, advertizer, adman
advisee
advocate, advocator, proponent, exponent
aeronautical_engineer
affiliate
affluent
aficionado
buck_sergeant
agent-in-place
aggravator, annoyance
agitator, fomenter
agnostic
agnostic, doubter
agonist
agony_aunt
agriculturist, agriculturalist, cultivator, grower, raiser
air_attache
air_force_officer, commander
airhead
air_traveler, air_traveller
alarmist
albino
alcoholic, alky, dipsomaniac, boozer, lush, soaker, souse
alderman
alexic
alienee, grantee
alienor
aliterate, aliterate_person
algebraist
allegorizer, allegoriser
alliterator
almoner, medical_social_worker
alpinist
altar_boy
alto
ambassador, embassador
ambassador
ambusher
amicus_curiae, friend_of_the_court
amoralist
amputee
analogist
analphabet, analphabetic
analyst
industry_analyst
market_strategist
anarchist, nihilist, syndicalist
anathema, bete_noire
ancestor, ascendant, ascendent, antecedent, root
anchor, anchorman, anchorperson
ancient
anecdotist, raconteur
angler, troller
animator
animist
annotator
announcer
announcer
anti
anti-American
anti-Semite, Jew-baiter
Anzac
ape-man
aphakic
appellant, plaintiff_in_error
appointee
apprehender
April_fool
aspirant, aspirer, hopeful, wannabe, wannabee
appreciator
appropriator
Arabist
archaist
archbishop
archer, bowman
architect, designer
archivist
archpriest, hierarch, high_priest, prelate, primate
Aristotelian, Aristotelean, Peripatetic
armiger
army_attache
army_engineer, military_engineer
army_officer
arranger, adapter, transcriber
arrival, arriver, comer
arthritic
articulator
artilleryman, cannoneer, gunner, machine_gunner
artist's_model, sitter
assayer
assemblyman
assemblywoman
assenter
asserter, declarer, affirmer, asseverator, avower
assignee
assistant, helper, help, supporter
assistant_professor
associate
associate
associate_professor
astronaut, spaceman, cosmonaut
cosmographer, cosmographist
atheist
athlete, jock
attendant, attender, tender
attorney_general
auditor
augur, auspex
aunt, auntie, aunty
au_pair_girl
authoritarian, dictator
authority
authorizer, authoriser
automobile_mechanic, auto-mechanic, car-mechanic, mechanic, grease_monkey
aviator, aeronaut, airman, flier, flyer
aviatrix, airwoman, aviatress
ayah
babu, baboo
baby, babe, sister
baby
baby_boomer, boomer
baby_farmer
back
backbencher
backpacker, packer
backroom_boy, brain_truster
backscratcher
bad_person
baggage
bag_lady
bailee
bailiff
bailor
bairn
baker, bread_maker
balancer
balker, baulker, noncompliant
ball-buster, ball-breaker
ball_carrier, runner
ballet_dancer
ballet_master
ballet_mistress
balletomane
ball_hawk
balloonist
ballplayer, baseball_player
bullfighter, toreador
banderillero
matador
picador
bandsman
banker
bank_robber
bankrupt, insolvent
bantamweight
barmaid
baron, big_businessman, business_leader, king, magnate, mogul, power, top_executive, tycoon
baron
baron
bartender, barman, barkeep, barkeeper, mixologist
baseball_coach, baseball_manager
base_runner, runner
basketball_player, basketeer, cager
basketweaver, basketmaker
Basket_Maker
bass, basso
bastard, by-blow, love_child, illegitimate_child, illegitimate, whoreson
bat_boy
bather
batman
baton_twirler, twirler
Bavarian
beadsman, bedesman
beard
beatnik, beat
beauty_consultant
Bedouin, Beduin
bedwetter, bed_wetter, wetter
beekeeper, apiarist, apiculturist
beer_drinker, ale_drinker
beggarman
beggarwoman
beldam, beldame
theist
believer, truster
bell_founder
benedick, benedict
berserker, berserk
besieger
best, topper
betrothed
Big_Brother
bigot
big_shot, big_gun, big_wheel, big_cheese, big_deal, big_enchilada, big_fish, head_honcho
big_sister
billiard_player
biochemist
biographer
bird_fancier
birth
birth-control_campaigner, birth-control_reformer
bisexual, bisexual_person
black_belt
blackmailer, extortioner, extortionist
Black_Muslim
blacksmith
blade
bleacher
blind_date
bluecoat
bluestocking, bas_bleu
boatbuilder
boatman, boater, waterman
boatswain, bos'n, bo's'n, bosun, bo'sun
bobby
bodyguard, escort
boffin
Bolshevik, Marxist, red, bolshie, bolshy
Bolshevik, Bolshevist
bombshell
bondman, bondsman
bondwoman, bondswoman, bondmaid
bondwoman, bondswoman, bondmaid
bond_servant
book_agent
bookbinder
bookkeeper
bookmaker
bookworm
booster, shoplifter, lifter
bootblack, shoeblack
bootlegger, moonshiner
bootmaker, boot_maker
borderer
border_patrolman
botanist, phytologist, plant_scientist
bottom_feeder
boulevardier
bounty_hunter
bounty_hunter
Bourbon
bowler
slugger, slogger
cub, lad, laddie, sonny, sonny_boy
Boy_Scout
boy_scout
boy_wonder
bragger, braggart, boaster, blowhard, line-shooter, vaunter
brahman, brahmin
brawler
breadwinner
breaststroker
breeder, stock_breeder
brick
bride
bridesmaid, maid_of_honor
bridge_agent
broadcast_journalist
Brother
brother-in-law
browser
Brummie, Brummy
buddy, brother, chum, crony, pal, sidekick
bull
bully
bunny, bunny_girl
burglar
bursar
busboy, waiter's_assistant
business_editor
business_traveler
buster
busybody, nosy-parker, nosey-parker, quidnunc
buttinsky
cabinetmaker, furniture_maker
caddie, golf_caddie
cadet, plebe
caller, caller-out
call_girl
calligrapher, calligraphist
campaigner, candidate, nominee
camper
camp_follower
candidate, prospect
canonist
capitalist
captain, headwaiter, maitre_d'hotel, maitre_d'
captain, senior_pilot
captain
captain, chieftain
captive
captive
cardinal
cardiologist, heart_specialist, heart_surgeon
card_player
cardsharp, card_sharp, cardsharper, card_sharper, sharper, sharpie, sharpy, card_shark
careerist
career_man
caregiver
caretaker
caretaker
caricaturist
carillonneur
caroler, caroller
carpenter
carper, niggler
Cartesian
cashier
casualty, injured_party
casualty
casuist, sophist
catechist
catechumen, neophyte
caterer
Catholicos
cat_fancier
Cavalier, Royalist
cavalryman, trooper
caveman, cave_man, cave_dweller, troglodyte
celebrant
celebrant, celebrator, celebrater
celebrity, famous_person
cellist, violoncellist
censor
censor
centenarian
centrist, middle_of_the_roader, moderate, moderationist
centurion
certified_public_accountant, CPA
chachka, tsatske, tshatshke, tchotchke, tchotchkeleh
chambermaid, fille_de_chambre
chameleon
champion, champ, title-holder
chandler
prison_chaplain
charcoal_burner
charge_d'affaires
charioteer
charmer, beguiler
chartered_accountant
chartist, technical_analyst
charwoman, char, cleaning_woman, cleaning_lady, woman
male_chauvinist, sexist
cheapskate, tightwad
Chechen
checker
cheerer
cheerleader
cheerleader
Cheops, Khufu
chess_master
chief_executive_officer, CEO, chief_operating_officer
chief_of_staff
chief_petty_officer
Chief_Secretary
child, kid, youngster, minor, shaver, nipper, small_fry, tiddler, tike, tyke, fry, nestling
child, kid
child, baby
child_prodigy, infant_prodigy, wonder_child
chimneysweeper, chimneysweep, sweep
chiropractor
chit
choker
choragus
choreographer
chorus_girl, showgirl, chorine
chosen
cicerone
cigar_smoker
cipher, cypher, nobody, nonentity
circus_acrobat
citizen
city_editor
city_father
city_man
city_slicker, city_boy
civic_leader, civil_leader
civil_rights_leader, civil_rights_worker, civil_rights_activist
cleaner
clergyman, reverend, man_of_the_cloth
cleric, churchman, divine, ecclesiastic
clerk
clever_Dick, clever_clogs
climatologist
climber
clinician
closer, finisher
closet_queen
clown, buffoon, goof, goofball, merry_andrew
clown, buffoon
coach, private_instructor, tutor
coach, manager, handler
pitching_coach
coachman
coal_miner, collier, pitman
coastguardsman
cobber
cobbler, shoemaker
codger, old_codger
co-beneficiary
cog
cognitive_neuroscientist
coiffeur
coiner
collaborator, cooperator, partner, pardner
colleen
college_student, university_student
collegian, college_man, college_boy
colonial
colonialist
colonizer, coloniser
coloratura, coloratura_soprano
color_guard
colossus, behemoth, giant, heavyweight, titan
comedian
comedienne
comer
commander
commander_in_chief, generalissimo
commanding_officer, commandant, commander
commissar, political_commissar
commissioned_officer
commissioned_military_officer
commissioner
commissioner
committee_member
committeewoman
commodore
communicant
communist, commie
Communist
commuter
compere
complexifier
compulsive
computational_linguist
computer_scientist
computer_user
Comrade
concert-goer, music_lover
conciliator, make-peace, pacifier, peacemaker, reconciler
conductor
confectioner, candymaker
Confederate
confessor
confidant, intimate
Confucian, Confucianist
rep
conqueror, vanquisher
Conservative
Nonconformist, chapelgoer
Anglican
consignee
consigner, consignor
constable
constructivist
contractor
contralto
contributor
control_freak
convalescent
convener
convict, con, inmate, yard_bird, yardbird
copilot, co-pilot
copycat, imitator, emulator, ape, aper
coreligionist
cornerback
corporatist
correspondent, letter_writer
cosmetician
cosmopolitan, cosmopolite
Cossack
cost_accountant
co-star
costumier, costumer, costume_designer
cotter, cottier
cotter, cottar
counselor, counsellor
counterterrorist
counterspy, mole
countess
compromiser
countrywoman
county_agent, agricultural_agent, extension_agent
courtier
cousin, first_cousin, cousin-german, full_cousin
cover_girl, pin-up, lovely
cow
craftsman, artisan, journeyman, artificer
craftsman, crafter
crapshooter
crazy, loony, looney, nutcase, weirdo
creature, wight
creditor
creep, weirdo, weirdie, weirdy, spook
criminologist
critic
Croesus
cross-examiner, cross-questioner
crossover_voter, crossover
croupier
crown_prince
crown_princess
cryptanalyst, cryptographer, cryptologist
Cub_Scout
cuckold
cultist
curandera
curate, minister_of_religion, minister, parson, pastor, rector
curator, conservator
customer_agent
cutter, carver
cyberpunk
cyborg, bionic_man, bionic_woman
cymbalist
Cynic
cytogeneticist
cytologist
czar
czar, tsar, tzar
dad, dada, daddy, pa, papa, pappa, pop
dairyman
Dalai_Lama, Grand_Lama
dallier, dillydallier, dilly-dallier, mope, lounger
dancer, professional_dancer, terpsichorean
dancer, social_dancer
clog_dancer
dancing-master, dance_master
dark_horse
darling, favorite, favourite, pet, dearie, deary, ducky
date, escort
daughter, girl
dawdler, drone, laggard, lagger, trailer, poke
day_boarder
day_laborer, day_labourer
deacon, Protestant_deacon
deaconess
deadeye
deipnosophist
dropout
deadhead
deaf_person
debtor, debitor
deckhand, roustabout
defamer, maligner, slanderer, vilifier, libeler, backbiter, traducer
defense_contractor
deist, freethinker
delegate
deliveryman, delivery_boy, deliverer
demagogue, demagog, rabble-rouser
demigod, superman, Ubermensch
demographer, demographist, population_scientist
demonstrator, protester
den_mother
department_head
depositor
deputy
dermatologist, skin_doctor
descender
designated_hitter
designer, intriguer
desk_clerk, hotel_desk_clerk, hotel_clerk
desk_officer
desk_sergeant, deskman, station_keeper
detainee, political_detainee
detective, investigator, tec, police_detective
detective
detractor, disparager, depreciator, knocker
developer
deviationist
devisee
devisor
devourer
dialectician
diarist, diary_keeper, journalist
dietician, dietitian, nutritionist
diocesan
director, theater_director, theatre_director
director
dirty_old_man
disbeliever, nonbeliever, unbeliever
disk_jockey, disc_jockey, dj
dispatcher
distortionist
distributor, distributer
district_attorney, DA
district_manager
diver, plunger
divorcee, grass_widow
ex-wife, ex
divorce_lawyer
docent
doctor, doc, physician, MD, Dr., medico
dodo, fogy, fogey, fossil
doge
dog_in_the_manger
dogmatist, doctrinaire
dolichocephalic
domestic_partner, significant_other, spousal_equivalent, spouse_equivalent
Dominican
dominus, dominie, domine, dominee
don, father
Donatist
donna
dosser, street_person
double, image, look-alike
double-crosser, double-dealer, two-timer, betrayer, traitor
down-and-out
doyenne
draftsman, drawer
dramatist, playwright
dreamer
dressmaker, modiste, needlewoman, seamstress, sempstress
dressmaker's_model
dribbler, driveller, slobberer, drooler
dribbler
drinker, imbiber, toper, juicer
drinker
drug_addict, junkie, junky
drug_user, substance_abuser, user
Druid
drum_majorette, majorette
drummer
drunk
drunkard, drunk, rummy, sot, inebriate, wino
Druze, Druse
dry, prohibitionist
dry_nurse
duchess
duke
duffer
dunker
Dutch_uncle
dyspeptic
eager_beaver, busy_bee, live_wire, sharpie, sharpy
earl
earner, wage_earner
eavesdropper
eccentric, eccentric_person, flake, oddball, geek
eclectic, eclecticist
econometrician, econometrist
economist, economic_expert
ectomorph
editor, editor_in_chief
egocentric, egoist
egotist, egoist, swellhead
ejaculator
elder
elder_statesman
elected_official
electrician, lineman, linesman
elegist
elocutionist
emancipator, manumitter
embryologist
emeritus
emigrant, emigre, emigree, outgoer
emissary, envoy
empress
employee
employer
enchantress, witch
enchantress, temptress, siren, Delilah, femme_fatale
encyclopedist, encyclopaedist
endomorph
enemy, foe, foeman, opposition
energizer, energiser, vitalizer, vitaliser, animator
end_man
end_man, corner_man
endorser, indorser
enjoyer
enlisted_woman
enophile, oenophile
entrant
entrant
entrepreneur, enterpriser
envoy, envoy_extraordinary, minister_plenipotentiary
enzymologist
eparch
epidemiologist
epigone, epigon
epileptic
Episcopalian
equerry
equerry
erotic
escapee
escapist, dreamer, wishful_thinker
Eskimo, Esquimau, Inuit
espionage_agent
esthetician, aesthetician
etcher
ethnologist
Etonian
etymologist
evangelist, revivalist, gospeler, gospeller
Evangelist
event_planner
examiner, inspector
examiner, tester, quizzer
exarch
executant
executive_secretary
executive_vice_president
executrix
exegete
exhibitor, exhibitioner, shower
exhibitionist, show-off
exile, expatriate, expat
existentialist, existentialist_philosopher, existential_philosopher
exorcist, exorciser
ex-spouse
extern, medical_extern
extremist
extrovert, extravert
eyewitness
facilitator
fairy_godmother
falangist, phalangist
falconer, hawker
falsifier
familiar
fan, buff, devotee, lover
fanatic, fiend
fancier, enthusiast
farm_boy
farmer, husbandman, granger, sodbuster
farmhand, fieldhand, field_hand, farm_worker
fascist
fascista
fatalist, determinist, predestinarian, predestinationist
father, male_parent, begetter
Father, Padre
father-figure
father-in-law
Fauntleroy, Little_Lord_Fauntleroy
Fauve, fauvist
favorite_son
featherweight
federalist
fellow_traveler, fellow_traveller
female_aristocrat
female_offspring
female_child, girl, little_girl
fence
fiance, groom-to-be
fielder, fieldsman
field_judge
fighter_pilot
filer
film_director, director
finder
fire_chief, fire_marshal
fire-eater, fire-swallower
fire-eater, hothead
fireman, firefighter, fire_fighter, fire-eater
fire_marshall
fire_walker
first_baseman, first_sacker
firstborn, eldest
first_lady
first_lieutenant, 1st_lieutenant
first_offender
first_sergeant, sergeant_first_class
fishmonger, fishwife
flagellant
flag_officer
flak_catcher, flak, flack_catcher, flack
flanker_back, flanker
flapper
flatmate
flatterer, adulator
flibbertigibbet, foolish_woman
flight_surgeon
floorwalker, shopwalker
flop, dud, washout
Florentine
flower_girl
flower_girl
flutist, flautist, flute_player
fly-by-night
flyweight
flyweight
foe, enemy
folk_dancer
folk_poet
follower
football_hero
football_player, footballer
footman
forefather, father, sire
foremother
foreign_agent
foreigner, outsider
boss
foreman
forester, tree_farmer, arboriculturist
forewoman
forger, counterfeiter
forward
foster-brother, foster_brother
foster-father, foster_father
foster-mother, foster_mother
foster-sister, foster_sister
foster-son, foster_son
founder, beginner, founding_father, father
foundress
four-minute_man
framer
Francophobe
freak, monster, monstrosity, lusus_naturae
free_agent, free_spirit, freewheeler
free_agent
freedom_rider
free-liver
freeloader
free_trader
Freudian
friar, mendicant
monk, monastic
frontierswoman
front_man, front, figurehead, nominal_head, straw_man, strawman
frotteur
fucker
fucker
fuddy-duddy
fullback
funambulist, tightrope_walker
fundamentalist
fundraiser
futurist
gadgeteer
gagman, gagster, gagwriter
gagman, standup_comedian
gainer, weight_gainer
gal
galoot
gambist
gambler
gamine
garbage_man, garbageman, garbage_collector, garbage_carter, garbage_hauler, refuse_collector, dustman
gardener
garment_cutter
garroter, garrotter, strangler, throttler, choker
gasman
gastroenterologist
gatherer
gawker
gendarme
general, full_general
generator, source, author
geneticist
genitor
gent
geologist
geophysicist
ghostwriter, ghost
Gibson_girl
girl, miss, missy, young_lady, young_woman, fille
girlfriend, girl, lady_friend
girlfriend
girl_wonder
Girondist, Girondin
gitano
gladiator
glassblower
gleaner
goat_herder, goatherd
godchild
godfather
godparent
godson
gofer
goffer, gopher
goldsmith, goldworker, gold-worker
golfer, golf_player, linksman
gondolier, gondoliere
good_guy
good_old_boy, good_ole_boy, good_ol'_boy
good_Samaritan
gossip_columnist
gouger
governor_general
grabber
grader
graduate_nurse, trained_nurse
grammarian, syntactician
granddaughter
grande_dame
grandfather, gramps, granddad, grandad, granddaddy, grandpa
Grand_Inquisitor
grandma, grandmother, granny, grannie, gran, nan, nanna
grandmaster
grandparent
grantee
granter
grass_widower, divorced_man
great-aunt, grandaunt
great_grandchild
great_granddaughter
great_grandmother
great_grandparent
great_grandson
great-nephew, grandnephew
great-niece, grandniece
Green_Beret
grenadier, grenade_thrower
greeter, saluter, welcomer
gringo
grinner
grocer
groom, bridegroom
groom, bridegroom
grouch, grump, crank, churl, crosspatch
group_captain
grunter
prison_guard, jailer, jailor, gaoler, screw, turnkey
guard
guesser
guest, invitee
guest
guest_of_honor
guest_worker, guestworker
guide
guitarist, guitar_player
gunnery_sergeant
guru
guru
guvnor
guy, cat, hombre, bozo
gymnast
gym_rat
gynecologist, gynaecologist, woman's_doctor
Gypsy, Gipsy, Romany, Rommany, Romani, Roma, Bohemian
hack, drudge, hacker
hacker, cyber-terrorist, cyberpunk
haggler
hairdresser, hairstylist, stylist, styler
hakim, hakeem
Hakka
halberdier
halfback
half_blood
hand
animal_trainer, handler
handyman, jack_of_all_trades, odd-job_man
hang_glider
hardliner
harlequin
harmonizer, harmoniser
hash_head
hatchet_man, iceman
hater
hatmaker, hatter, milliner, modiste
headman, tribal_chief, chieftain, chief
headmaster, schoolmaster, master
head_nurse
hearer, listener, auditor, attender
heartbreaker
heathen, pagan, gentile, infidel
heavyweight
heavy
heckler, badgerer
hedger
hedger, equivocator, tergiversator
hedonist, pagan, pleasure_seeker
heir, inheritor, heritor
heir_apparent
heiress, inheritress, inheritrix
heir_presumptive
hellion, heller, devil
helmsman, steersman, steerer
hire
hematologist, haematologist
hemiplegic
herald, trumpeter
herbalist, herb_doctor
herder, herdsman, drover
hermaphrodite, intersex, gynandromorph, androgyne, epicene, epicene_person
heroine
heroin_addict
hero_worshiper, hero_worshipper
Herr
highbinder
highbrow
high_commissioner
highflier, highflyer
Highlander, Scottish_Highlander, Highland_Scot
high-muck-a-muck, pooh-bah
high_priest
highjacker, hijacker
hireling, pensionary
historian, historiographer
hitchhiker
hitter, striker
hobbyist
holdout
holdover, hangover
holdup_man, stickup_man
homeboy
homeboy
home_buyer
homegirl
homeless, homeless_person
homeopath, homoeopath
honest_woman
honor_guard, guard_of_honor
hooker
hoper
hornist
horseman, equestrian, horseback_rider
horse_trader
horsewoman
horse_wrangler, wrangler
horticulturist, plantsman
hospital_chaplain
host, innkeeper, boniface
host
hostess
hotelier, hotelkeeper, hotel_manager, hotelman, hosteller
housekeeper
housemaster
housemate
house_physician, resident, resident_physician
house_sitter
housing_commissioner
huckster, cheap-jack
hugger
humanist, humanitarian
humanitarian, do-gooder, improver
hunk
huntress
ex-husband, ex
hydrologist
hyperope
hypertensive
hypnotist, hypnotizer, hypnotiser, mesmerist, mesmerizer
hypocrite, dissembler, dissimulator, phony, phoney, pretender
iceman
iconoclast
ideologist, ideologue
idol, matinee_idol
idolizer, idoliser
imam, imaum
imperialist
important_person, influential_person, personage
inamorato
incumbent, officeholder
incurable
inductee
industrialist
infanticide
inferior
infernal
infielder
infiltrator
informer, betrayer, rat, squealer, blabber
ingenue
ingenue
polymath
in-law, relative-in-law
inquiry_agent
inspector
inspector_general
instigator, initiator
insurance_broker, insurance_agent, general_agent, underwriter
insurgent, insurrectionist, freedom_fighter, rebel
intelligence_analyst
interior_designer, designer, interior_decorator, house_decorator, room_decorator, decorator
interlocutor, conversational_partner
interlocutor, middleman
International_Grandmaster
internationalist
internist
interpreter, translator
interpreter
intervenor
introvert
invader, encroacher
invalidator, voider, nullifier
investigator
investor
invigilator
irreligionist
Ivy_Leaguer
Jack_of_all_trades
Jacksonian
Jane_Doe
janissary
Jat
Javanese, Javan
Jekyll_and_Hyde
jester, fool, motley_fool
Jesuit
jezebel
jilt
jobber, middleman, wholesaler
job_candidate
Job's_comforter
jockey
John_Doe
journalist
judge, justice, jurist
judge_advocate
juggler
Jungian
junior
junior
Junior, Jr, Jnr
junior_lightweight
junior_middleweight
jurist, legal_expert
juror, juryman, jurywoman
justice_of_the_peace
justiciar, justiciary
kachina
keyboardist
Khedive
kingmaker
king, queen, world-beater
King's_Counsel
Counsel_to_the_Crown
kin, kinsperson, family
enate, matrikin, matrilineal_kin, matrisib, matrilineal_sib
kink
kinswoman
kisser, osculator
kitchen_help
kitchen_police, KP
Klansman, Ku_Kluxer, Kluxer
kleptomaniac
kneeler
knight
knocker
knower, apprehender
know-it-all, know-all
kolkhoznik
Kshatriya
labor_coach, birthing_coach, doula, monitrice
laborer, manual_laborer, labourer, jack
Labourite
lady
lady-in-waiting
lady's_maid
lama
lamb, dear
lame_duck
lamplighter
land_agent
landgrave
landlubber, lubber, landsman
landlubber, landsman, landman
landowner, landholder, property_owner
landscape_architect, landscape_gardener, landscaper, landscapist
langlaufer
languisher
lapidary, lapidarist
lass, lassie, young_girl, jeune_fille
Latin
Latin
latitudinarian
Jehovah's_Witness
law_agent
lawgiver, lawmaker
lawman, law_officer, peace_officer
law_student
lawyer, attorney
lay_reader
lazybones
leaker
leaseholder, lessee
lector, lecturer, reader
lector, reader
lecturer
left-hander, lefty, southpaw
legal_representative
legate, official_emissary
legatee
legionnaire, legionary
letterman
liberator
licenser
licentiate
lieutenant
lieutenant_colonel, light_colonel
lieutenant_commander
lieutenant_junior_grade, lieutenant_JG
life
lifeguard, lifesaver
life_tenant
light_flyweight
light_heavyweight, cruiserweight
light_heavyweight
light-o'-love, light-of-love
lightweight
lightweight
lightweight
lilliputian
limnologist
lineman
line_officer
lion-hunter
lisper
lister
literary_critic
literate, literate_person
litigant, litigator
litterer, litterbug, litter_lout
little_brother
little_sister
lobbyist
locksmith
locum_tenens, locum
Lord, noble, nobleman
loser
loser, also-ran
failure, loser, nonstarter, unsuccessful_person
Lothario
loudmouth, blusterer
lowerclassman, underclassman
Lowlander, Scottish_Lowlander, Lowland_Scot
loyalist, stalwart
Luddite
lumberman, lumberjack, logger, feller, faller
lumper
bedlamite
pyromaniac
lutist, lutanist, lutenist
Lutheran
lyricist, lyrist
macebearer, mace, macer
machinist, mechanic, shop_mechanic
madame
maenad
maestro, master
magdalen
magician, prestidigitator, conjurer, conjuror, illusionist
magus
maharani, maharanee
mahatma
maid, maiden
maid, maidservant, housemaid, amah
major
major
major-domo, seneschal
maker, shaper
malahini
malcontent
malik
malingerer, skulker, shammer
Malthusian
adonis
man
man
manageress
mandarin
maneuverer, manoeuvrer
maniac
Manichaean, Manichean, Manichee
manicurist
manipulator
man-at-arms
man_of_action, man_of_deeds
man_of_letters
manufacturer, producer
marcher, parader
marchioness, marquise
margrave
margrave
Marine, devil_dog, leatherneck, shipboard_soldier
marquess
marquis, marquess
marshal, marshall
martinet, disciplinarian, moralist
mascot
masochist
mason, stonemason
masquerader, masker, masquer
masseur
masseuse
master
master, captain, sea_captain, skipper
master-at-arms
master_of_ceremonies, emcee, host
masturbator, onanist
matchmaker, matcher, marriage_broker
mate, first_mate
mate
mate
mater
material
materialist
matriarch, materfamilias
matriarch
matriculate
matron
mayor, city_manager
mayoress
mechanical_engineer
medalist, medallist, medal_winner
medical_officer, medic
medical_practitioner, medical_man
medical_scientist
medium, spiritualist, sensitive
megalomaniac
melancholic, melancholiac
Melkite, Melchite
melter
nonmember
board_member
clansman, clanswoman, clan_member
memorizer, memoriser
Mendelian
mender, repairer, fixer
Mesoamerican
messmate
mestiza
meteorologist
meter_maid
Methodist
Metis
metropolitan
mezzo-soprano, mezzo
microeconomist, microeconomic_expert
middle-aged_man
middlebrow
middleweight
midwife, accoucheuse
mikado, tenno
Milanese
miler
miles_gloriosus
military_attache
military_chaplain, padre, Holy_Joe, sky_pilot
military_leader
military_officer, officer
military_policeman, MP
mill_agent
mill-hand, factory_worker
millionairess
millwright
minder
mining_engineer
minister, government_minister
ministrant
minor_leaguer, bush_leaguer
Minuteman
misanthrope, misanthropist
misfit
mistress
mistress, kept_woman, fancy_woman
mixed-blood
model, poser
class_act
modeler, modeller
modifier
molecular_biologist
Monegasque, Monacan
monetarist
moneygrubber
moneymaker
Mongoloid
monolingual
monologist
moonlighter
moralist
morosoph
morris_dancer
mortal_enemy
mortgagee, mortgage_holder
mortician, undertaker, funeral_undertaker, funeral_director
moss-trooper
mother, female_parent
mother
mother
mother_figure
mother_hen
mother-in-law
mother's_boy, mamma's_boy, mama's_boy
mother's_daughter
motorcycle_cop, motorcycle_policeman, speed_cop
motorcyclist
Mound_Builder
mountebank, charlatan
mourner, griever, sorrower, lamenter
mouthpiece, mouth
mover
moviegoer, motion-picture_fan
muffin_man
mugwump, independent, fencesitter
Mullah, Mollah, Mulla
muncher
murderess
murder_suspect
musher
musician, instrumentalist, player
musicologist
music_teacher
musketeer
Muslimah
mutilator, maimer, mangler
mutineer
mute, deaf-mute, deaf-and-dumb_person
mutterer, mumbler, murmurer
muzzler
Mycenaen
mycologist
myope
myrmidon
mystic, religious_mystic
mythologist
naif
nailer
namby-pamby
name_dropper
namer
nan
nanny, nursemaid, nurse
narc, nark, narcotics_agent
narcissist, narcist
nark, copper's_nark
nationalist
nautch_girl
naval_commander
Navy_SEAL, SEAL
obstructionist, obstructor, obstructer, resister, thwarter
Nazarene
Nazarene, Ebionite
Nazi, German_Nazi
nebbish, nebbech
necker
neonate, newborn, newborn_infant, newborn_baby
nephew
neurobiologist
neurologist, brain_doctor
neurosurgeon, brain_surgeon
neutral
neutralist
newcomer, fledgling, fledgeling, starter, neophyte, freshman, newbie, entrant
newcomer
New_Dealer
newspaper_editor
newsreader, news_reader
Newtonian
niece
niggard, skinflint, scrooge, churl
night_porter
night_rider, nightrider
NIMBY
niqaabi
nitpicker
Nobelist, Nobel_Laureate
NOC
noncandidate
noncommissioned_officer, noncom, enlisted_officer
nondescript
nondriver
nonparticipant
nonperson, unperson
nonresident
nonsmoker
Northern_Baptist
noticer
novelist
novitiate, novice
nuclear_chemist, radiochemist
nudger
nullipara
number_theorist
nurse
nursling, nurseling, suckling
nymph, houri
nymphet
nympholept
nymphomaniac, nympho
oarswoman
oboist
obscurantist
observer, commentator
obstetrician, accoucheur
occupier
occultist
wine_lover
offerer, offeror
office-bearer
office_boy
officeholder, officer
officiant
Federal, Fed, federal_official
oilman
oil_tycoon
old-age_pensioner
old_boy
old_lady
old_man
oldster, old_person, senior_citizen, golden_ager
old-timer, oldtimer, gaffer, old_geezer, antique
old_woman
oligarch
Olympian
omnivore
oncologist
onlooker, looker-on
onomancer
operator
opportunist, self-seeker
optimist
Orangeman
orator, speechmaker, rhetorician, public_speaker, speechifier
orderly, hospital_attendant
orderly
orderly_sergeant
ordinand
ordinary
organ-grinder
organist
organization_man
organizer, organiser, arranger
organizer, organiser, labor_organizer
originator, conceiver, mastermind
ornithologist, bird_watcher
orphan
orphan
osteopath, osteopathist
out-and-outer
outdoorswoman
outfielder
outfielder
right_fielder
right-handed_pitcher, right-hander
outlier
owner-occupier
oyabun
packrat
padrone
padrone
page, pageboy
painter
Paleo-American, Paleo-Amerind, Paleo-Indian
paleontologist, palaeontologist, fossilist
pallbearer, bearer
palmist, palmister, chiromancer
pamperer, spoiler, coddler, mollycoddler
Panchen_Lama
panelist, panellist
panhandler
paparazzo
paperboy
paperhanger, paperer
paperhanger
papoose, pappoose
pardoner
paretic
parishioner
park_commissioner
Parliamentarian, Member_of_Parliament
parliamentary_agent
parodist, lampooner
parricide
parrot
partaker, sharer
part-timer
party
party_man, party_liner
passenger, rider
passer
paster
pater
patient
patriarch
patriarch
patriarch, paterfamilias
patriot, nationalist
patron, sponsor, supporter
patternmaker
pawnbroker
payer, remunerator
peacekeeper
peasant
pedant, bookworm, scholastic
peddler, pedlar, packman, hawker, pitchman
pederast, paederast, child_molester
penologist
pentathlete
Pentecostal, Pentecostalist
percussionist
periodontist
peshmerga
personality
personal_representative
personage
persona_grata
persona_non_grata
personification
perspirer, sweater
pervert, deviant, deviate, degenerate
pessimist
pest, blighter, cuss, pesterer, gadfly
Peter_Pan
petitioner, suppliant, supplicant, requester
petit_juror, petty_juror
pet_sitter, critter_sitter
petter, fondler
Pharaoh, Pharaoh_of_Egypt
pharmacist, druggist, chemist, apothecary, pill_pusher, pill_roller
philanthropist, altruist
philatelist, stamp_collector
philosopher
phonetician
phonologist
photojournalist
photometrist, photometrician
physical_therapist, physiotherapist
physicist
piano_maker
picker, chooser, selector
picnicker, picknicker
pilgrim
pill
pillar, mainstay
pill_head
pilot
Piltdown_man, Piltdown_hoax
pimp, procurer, panderer, pander, pandar, fancy_man, ponce
pipe_smoker
pip-squeak, squirt, small_fry
pisser, urinator
pitcher, hurler, twirler
pitchman
placeman, placeseeker
placer_miner
plagiarist, plagiarizer, plagiariser, literary_pirate, pirate
plainsman
planner, contriver, deviser
planter, plantation_owner
plasterer
platinum_blond, platinum_blonde
platitudinarian
playboy, man-about-town, Corinthian
player, participant
playmate, playfellow
pleaser
pledger
plenipotentiary
plier, plyer
plodder, slowpoke, stick-in-the-mud, slowcoach
plodder, slogger
plotter, mapper
plumber, pipe_fitter
pluralist
pluralist
poet
pointsman
point_woman
policyholder
political_prisoner
political_scientist
politician, politico, pol, political_leader
politician
pollster, poll_taker, headcounter, canvasser
polluter, defiler
pool_player
portraitist, portrait_painter, portrayer, limner
poseuse
positivist, rationalist
postdoc, post_doc
poster_girl
postulator
private_citizen
problem_solver, solver, convergent_thinker
pro-lifer
prosthetist
postulant
potboy, potman
poultryman, poulterer
power_user
power_worker, power-station_worker
practitioner, practician
prayer, supplicant
preceptor, don
predecessor
preemptor, pre-emptor
preemptor, pre-emptor
premature_baby, preterm_baby, premature_infant, preterm_infant, preemie, premie
presbyter
presenter, sponsor
presentist
preserver
president
President_of_the_United_States, United_States_President, President, Chief_Executive
president, prexy
press_agent, publicity_man, public_relations_man, PR_man
press_photographer
priest
prima_ballerina
prima_donna, diva
prima_donna
primigravida, gravida_I
primordial_dwarf, hypoplastic_dwarf, true_dwarf, normal_dwarf
prince_charming
prince_consort
princeling
Prince_of_Wales
princess
princess_royal
principal, dealer
principal, school_principal, head_teacher, head
print_seller
prior
private, buck_private, common_soldier
probationer, student_nurse
processor
process-server
proconsul
proconsul
proctologist
proctor, monitor
procurator
procurer, securer
profit_taker
programmer, computer_programmer, coder, software_engineer
promiser, promisor
promoter, booster, plugger
promulgator
propagandist
propagator, disseminator
property_man, propman, property_master
prophetess
prophet
prosecutor, public_prosecutor, prosecuting_officer, prosecuting_attorney
prospector
protectionist
protegee
protozoologist
provost_marshal
pruner, trimmer
psalmist
psephologist
psychiatrist, head-shrinker, shrink
psychic
psycholinguist
psychophysicist
publican, tavern_keeper
pudge
puerpera
punching_bag
punter
punter
puppeteer
puppy, pup
purchasing_agent
puritan
Puritan
pursuer
pusher, shover
pusher, drug_peddler, peddler, drug_dealer, drug_trafficker
pusher, thruster
putz
Pygmy, Pigmy
qadi
quadriplegic
quadruplet, quad
quaker, trembler
quarter
quarterback, signal_caller, field_general
quartermaster
quartermaster_general
Quebecois
queen, queen_regnant, female_monarch
Queen_of_England
queen
queen
queen_consort
queen_mother
Queen's_Counsel
question_master, quizmaster
quick_study, sponge
quietist
quitter
rabbi
racist, racialist
radiobiologist
radiologic_technologist
radiologist, radiotherapist
rainmaker
raiser
raja, rajah
rake, rakehell, profligate, rip, blood, roue
ramrod
ranch_hand
ranker
ranter, raver
rape_suspect
rapper
rapporteur
rare_bird, rara_avis
ratepayer
raw_recruit
reader
reading_teacher
realist
real_estate_broker, real_estate_agent, estate_agent, land_agent, house_agent
rear_admiral
receiver
reciter
recruit, enlistee
recruit, military_recruit
recruiter
recruiting-sergeant
redcap
redhead, redheader, red-header, carrottop
redneck, cracker
reeler
reenactor
referral
referee, ref
refiner
Reform_Jew
registered_nurse, RN
registrar
Regius_professor
reliever, allayer, comforter
anchorite, hermit
religious_leader
remover
Renaissance_man, generalist
renegade
rentier
repairman, maintenance_man, service_man
reporter, newsman, newsperson
newswoman
representative
reprobate, miscreant
rescuer, recoverer, saver
reservist
resident_commissioner
respecter
restaurateur, restauranter
restrainer, controller
retailer, retail_merchant
retiree, retired_person
returning_officer
revenant
revisionist
revolutionist, revolutionary, subversive, subverter
rheumatologist
Rhodesian_man, Homo_rhodesiensis
rhymer, rhymester, versifier, poetizer, poetiser
rich_person, wealthy_person, have
rider
riding_master
rifleman
right-hander, right_hander, righthander
right-hand_man, chief_assistant, man_Friday
ringer
ringleader
roadman, road_mender
roarer, bawler, bellower, screamer, screecher, shouter, yeller
rocket_engineer, rocket_scientist
rocket_scientist
rock_star
Romanov, Romanoff
romanticist, romantic
ropemaker, rope-maker, roper
roper
roper
ropewalker, ropedancer
rosebud
Rosicrucian
Mountie
Rough_Rider
roundhead
civil_authority, civil_officer
runner
runner
runner
running_back
rusher
rustic
saboteur, wrecker, diversionist
sadist
sailing_master, navigator
sailor, crewman
salesgirl, saleswoman, saleslady
salesman
salesperson, sales_representative, sales_rep
salvager, salvor
sandwichman
sangoma
sannup
sapper
Sassenach
satrap
saunterer, stroller, ambler
Savoyard
sawyer
scalper
scandalmonger
scapegrace, black_sheep
scene_painter
schemer, plotter
schizophrenic
schlemiel, shlemiel
schlockmeister, shlockmeister
scholar, scholarly_person, bookman, student
scholiast
schoolchild, school-age_child, pupil
schoolfriend
Schoolman, medieval_Schoolman
schoolmaster
schoolmate, classmate, schoolfellow, class_fellow
scientist
scion
scoffer, flouter, mocker, jeerer
scofflaw
scorekeeper, scorer
scorer
scourer
scout, talent_scout
scoutmaster
scrambler
scratcher
screen_actor, movie_actor
scrutineer, canvasser
scuba_diver
sculptor, sculpturer, carver, statue_maker
Sea_Scout
seasonal_worker, seasonal
seasoner
second_baseman, second_sacker
second_cousin
seconder
second_fiddle, second_banana
second-in-command
second_lieutenant, 2nd_lieutenant
second-rater, mediocrity
secretary
Secretary_of_Agriculture, Agriculture_Secretary
Secretary_of_Health_and_Human_Services
Secretary_of_State
Secretary_of_the_Interior, Interior_Secretary
sectarian, sectary, sectarist
section_hand
secularist
security_consultant
seeded_player, seed
seeder, cloud_seeder
seeker, searcher, quester
segregate
segregator, segregationist
selectman
selectwoman
selfish_person
self-starter
seller, marketer, vender, vendor, trafficker
selling_agent
semanticist, semiotician
semifinalist
seminarian, seminarist
senator
sendee
senior
senior_vice_president
separatist, separationist
septuagenarian
serf, helot, villein
spree_killer
serjeant-at-law, serjeant, sergeant-at-law, sergeant
server
serviceman, military_man, man, military_personnel
settler, colonist
settler
sex_symbol
sexton, sacristan
shaheed
Shakespearian, Shakespearean
shanghaier, seizer
sharecropper, cropper, sharecrop_farmer
shaver
Shavian
sheep
sheik, tribal_sheik, sheikh, tribal_sheikh, Arab_chief
shelver
shepherd
ship-breaker
shipmate
shipowner
shipping_agent
shirtmaker
shogun
shopaholic
shop_girl
shop_steward, steward
shot_putter
shrew, termagant
shuffler
shyster, pettifogger
sibling, sib
sick_person, diseased_person, sufferer
sightreader
signaler, signaller
signer
signor, signior
signora
signore
signorina
silent_partner, sleeping_partner
addle-head, addlehead, loon, birdbrain
simperer
singer, vocalist, vocalizer, vocaliser
Sinologist
sipper
sirrah
Sister
sister, sis
waverer, vacillator, hesitator, hesitater
sitar_player
sixth-former
skateboarder
skeptic, sceptic, doubter
sketcher
skidder
skier
skinny-dipper
skin-diver, aquanaut
skinhead
slasher
slattern, slut, slovenly_woman, trollop
sleeper, slumberer
sleeper
sleeping_beauty
sleuth, sleuthhound
slob, sloven, pig, slovenly_person
sloganeer
slopseller, slop-seller
smasher, stunner, knockout, beauty, ravisher, sweetheart, peach, lulu, looker, mantrap, dish
smirker
smith, metalworker
smoothie, smoothy, sweet_talker, charmer
smuggler, runner, contrabandist, moon_curser, moon-curser
sneezer
snob, prig, snot, snoot
snoop, snooper
snorer
sob_sister
soccer_player
social_anthropologist, cultural_anthropologist
social_climber, climber
socialist
socializer, socialiser
social_scientist
social_secretary
Socinian
sociolinguist
sociologist
soda_jerk, soda_jerker
sodalist
sodomite, sodomist, sod, bugger
soldier
son, boy
songster
songstress
songwriter, songster, ballad_maker
sorcerer, magician, wizard, necromancer, thaumaturge, thaumaturgist
sorehead
soul_mate
Southern_Baptist
sovereign, crowned_head, monarch
spacewalker
Spanish_American, Hispanic_American, Hispanic
sparring_partner, sparring_mate
spastic
speaker, talker, utterer, verbalizer, verbaliser
native_speaker
Speaker
speechwriter
specialist, medical_specialist
specifier
spectator, witness, viewer, watcher, looker
speech_therapist
speedskater, speed_skater
spellbinder
sphinx
spinster, old_maid
split_end
sport, sportsman, sportswoman
sport, summercater
sporting_man, outdoor_man
sports_announcer, sportscaster, sports_commentator
sports_editor
sprog
square_dancer
square_shooter, straight_shooter, straight_arrow
squatter
squire
squire
staff_member, staffer
staff_sergeant
stage_director
stainer
stakeholder
stalker
stalking-horse
stammerer, stutterer
stamper, stomper, tramper, trampler
standee
stand-in, substitute, relief, reliever, backup, backup_man, fill-in
star, principal, lead
starlet
starter, dispatcher
statesman, solon, national_leader
state_treasurer
stationer, stationery_seller
stenographer, amanuensis, shorthand_typist
stentor
stepbrother, half-brother, half_brother
stepmother
stepparent
stevedore, loader, longshoreman, docker, dockhand, dock_worker, dockworker, dock-walloper, lumper
steward
steward, flight_attendant
steward
stickler
stiff
stifler, smotherer
stipendiary, stipendiary_magistrate
stitcher
stockjobber
stock_trader
stockist
stoker, fireman
stooper
store_detective
strafer
straight_man, second_banana
stranger, alien, unknown
stranger
strategist, strategian
straw_boss, assistant_foreman
streetwalker, street_girl, hooker, hustler, floozy, floozie, slattern
stretcher-bearer, litter-bearer
struggler
stud, he-man, macho-man
student, pupil, educatee
stumblebum, palooka
stylist
subaltern
subcontractor
subduer, surmounter, overcomer
subject, case, guinea_pig
subordinate, subsidiary, underling, foot_soldier
substitute, reserve, second-stringer
successor, heir
successor, replacement
succorer, succourer
Sufi
suffragan, suffragan_bishop
suffragette
sugar_daddy
suicide_bomber
suitor, suer, wooer
sumo_wrestler
sunbather
sundowner
super_heavyweight
superior, higher-up, superordinate
supermom
supernumerary, spear_carrier, extra
supremo
surgeon, operating_surgeon, sawbones
Surgeon_General
Surgeon_General
surpriser
surveyor
surveyor
survivor, subsister
sutler, victualer, victualler, provisioner
sweeper
sweetheart, sweetie, steady, truelove
swinger, tramp
switcher, whipper
swot, grind, nerd, wonk, dweeb
sycophant, toady, crawler, lackey, ass-kisser
sylph
sympathizer, sympathiser, well-wisher
symphonist
syncopator
syndic
tactician
tagger
tailback
tallyman, tally_clerk
tallyman
tanker, tank_driver
tapper, wiretapper, phone_tapper
Tartuffe, Tartufe
Tarzan
taster, taste_tester, taste-tester, sampler
tax_assessor, assessor
taxer
taxi_dancer
taxonomist, taxonomer, systematist
teacher, instructor
teaching_fellow
tearaway
technical_sergeant
technician
Ted, Teddy_boy
teetotaler, teetotaller, teetotalist
television_reporter, television_newscaster, TV_reporter, TV_newsman
temporizer, temporiser
tempter
term_infant
toiler
tenant, renter
tenant
tenderfoot
tennis_player
tennis_pro, professional_tennis_player
tenor_saxophonist, tenorist
termer
terror, scourge, threat
tertigravida, gravida_III
testator, testate
testatrix
testee, examinee
test-tube_baby
Texas_Ranger, Ranger
thane
theatrical_producer
theologian, theologist, theologizer, theologiser
theorist, theoretician, theorizer, theoriser, idealogue
theosophist
therapist, healer
Thessalonian
thinker, creative_thinker, mind
thinker
thrower
thurifer
ticket_collector, ticket_taker
tight_end
tiler
timekeeper, timer
Timorese
tinkerer, fiddler
tinsmith, tinner
tinter
tippler, social_drinker
tipster, tout
T-man
toastmaster, symposiarch
toast_mistress
tobogganist
tomboy, romp, hoyden
toolmaker
torchbearer
Tory
Tory
tosser
tosser, jerk-off, wanker
totalitarian
tourist, tourer, holidaymaker
tout, touter
tout, ticket_tout
tovarich, tovarisch
towhead
town_clerk
town_crier, crier
townsman, towner
toxicologist
track_star
trader, bargainer, dealer, monger
trade_unionist, unionist, union_member
traditionalist, diehard
traffic_cop
tragedian
tragedian
tragedienne
trail_boss
trainer
traitor, treasonist
traitress
transactor
transcriber
transfer, transferee
transferee
translator, transcriber
transvestite, cross-dresser
traveling_salesman, travelling_salesman, commercial_traveler, commercial_traveller, roadman, bagman
traverser
trawler
Treasury, First_Lord_of_the_Treasury
trencher
trend-setter, taste-maker, fashion_arbiter
tribesman
trier, attempter, essayer
trifler
trooper
trooper, state_trooper
Trotskyite, Trotskyist, Trot
truant, hooky_player
trumpeter, cornetist
trusty
Tudor
tumbler
tutee
twin
two-timer
Tyke
tympanist, timpanist
typist
tyrant, autocrat, despot
umpire, ump
understudy, standby
undesirable
unicyclist
unilateralist
Unitarian
Arminian
universal_donor
UNIX_guru
Unknown_Soldier
upsetter
upstager
upstart, parvenu, nouveau-riche, arriviste
upstart
urchin
urologist
usherette
usher, doorkeeper
usurper, supplanter
utility_man
utilizer, utiliser
Utopian
uxoricide
vacationer, vacationist
valedictorian, valedictory_speaker
valley_girl
vaulter, pole_vaulter, pole_jumper
vegetarian
vegan
venerator
venture_capitalist
venturer, merchant-venturer
vermin, varmint
very_important_person, VIP, high-up, dignitary, panjandrum, high_muckamuck
vibist, vibraphonist
vicar
vicar
vicar-general
vice_chancellor
vicegerent
vice_president, V.P.
vice-regent
victim, dupe
Victorian
victualer, victualler
vigilante, vigilance_man
villager
vintager
vintner, wine_merchant
violator, debaucher, ravisher
violator, lawbreaker, law_offender
violist
virago
virologist
Visayan, Bisayan
viscountess
viscount
Visigoth
visionary
visiting_fireman
visiting_professor
visualizer, visualiser
vixen, harpy, hellcat
vizier
voicer
volunteer, unpaid_worker
volunteer, military_volunteer, voluntary
votary
votary
vouchee
vower
voyager
voyeur, Peeping_Tom, peeper
vulcanizer, vulcaniser
waffler
Wagnerian
waif, street_child
wailer
waiter, server
waitress
walking_delegate
walk-on
wallah
wally
waltzer
wanderer, roamer, rover, bird_of_passage
Wandering_Jew
wanton
warrantee
warrantee
washer
washerman, laundryman
washwoman, washerwoman, laundrywoman, laundress
wassailer, carouser
wastrel, waster
Wave
weatherman, weather_forecaster
weekend_warrior
weeder
welder
welfare_case, charity_case
westerner
West-sider
wetter
whaler
Whig
whiner, complainer, moaner, sniveller, crybaby, bellyacher, grumbler, squawker
whipper-in
whisperer
whiteface
Carmelite, White_Friar
Augustinian
white_hope, great_white_hope
white_supremacist
whoremaster, whoremonger
whoremaster, whoremonger, john, trick
widow, widow_woman
wife, married_woman
wiggler, wriggler, squirmer
wimp, chicken, crybaby
wing_commander
winger
winner
winner, victor
window_dresser, window_trimmer
winker
wiper
wireman, wirer
wise_guy, smart_aleck, wiseacre, wisenheimer, weisenheimer
witch_doctor
withdrawer
withdrawer
woman, adult_female
woman
wonder_boy, golden_boy
wonderer
working_girl
workman, workingman, working_man, working_person
workmate
worldling
worshiper, worshipper
worthy
wrecker
wright
write-in_candidate, write-in
writer, author
Wykehamist
yakuza
yard_bird, yardbird
yardie
yardman
yardmaster, trainmaster, train_dispatcher
yenta
yogi
young_buck, young_man
young_Turk
Young_Turk
Zionist
zoo_keeper
Genet, Edmund_Charles_Edouard_Genet, Citizen_Genet
Kennan, George_F._Kennan, George_Frost_Kennan
Munro, H._H._Munro, Hector_Hugh_Munro, Saki
Popper, Karl_Popper, Sir_Karl_Raimund_Popper
Stoker, Bram_Stoker, Abraham_Stoker
Townes, Charles_Townes, Charles_Hard_Townes
dust_storm, duster, sandstorm, sirocco
parhelion, mock_sun, sundog
snow, snowfall
facula
wave
microflora
wilding
semi-climber
volva
basidiocarp
domatium
apomict
aquatic
bryophyte, nonvascular_plant
acrocarp, acrocarpous_moss
sphagnum, sphagnum_moss, peat_moss, bog_moss
liverwort, hepatic
hepatica, Marchantia_polymorpha
pecopteris
pteridophyte, nonflowering_plant
fern
fern_ally
spore
carpospore
chlamydospore
conidium, conidiospore
oospore
tetraspore
zoospore
cryptogam
spermatophyte, phanerogam, seed_plant
seedling
annual
biennial
perennial
hygrophyte
gymnosperm
gnetum, Gnetum_gnemon
Catha_edulis
ephedra, joint_fir
mahuang, Ephedra_sinica
welwitschia, Welwitschia_mirabilis
cycad
sago_palm, Cycas_revoluta
false_sago, fern_palm, Cycas_circinalis
zamia
coontie, Florida_arrowroot, Seminole_bread, Zamia_pumila
ceratozamia
dioon
encephalartos
kaffir_bread, Encephalartos_caffer
macrozamia
burrawong, Macrozamia_communis, Macrozamia_spiralis
pine, pine_tree, true_pine
pinon, pinyon
nut_pine
pinon_pine, Mexican_nut_pine, Pinus_cembroides
Rocky_mountain_pinon, Pinus_edulis
single-leaf, single-leaf_pine, single-leaf_pinyon, Pinus_monophylla
bishop_pine, bishop's_pine, Pinus_muricata
California_single-leaf_pinyon, Pinus_californiarum
Parry's_pinyon, Pinus_quadrifolia, Pinus_parryana
spruce_pine, Pinus_glabra
black_pine, Pinus_nigra
pitch_pine, northern_pitch_pine, Pinus_rigida
pond_pine, Pinus_serotina
stone_pine, umbrella_pine, European_nut_pine, Pinus_pinea
Swiss_pine, Swiss_stone_pine, arolla_pine, cembra_nut_tree, Pinus_cembra
cembra_nut, cedar_nut
Swiss_mountain_pine, mountain_pine, dwarf_mountain_pine, mugho_pine, mugo_pine, Pinus_mugo
ancient_pine, Pinus_longaeva
white_pine
American_white_pine, eastern_white_pine, weymouth_pine, Pinus_strobus
western_white_pine, silver_pine, mountain_pine, Pinus_monticola
southwestern_white_pine, Pinus_strobiformis
limber_pine, Pinus_flexilis
whitebark_pine, whitebarked_pine, Pinus_albicaulis
yellow_pine
ponderosa, ponderosa_pine, western_yellow_pine, bull_pine, Pinus_ponderosa
Jeffrey_pine, Jeffrey's_pine, black_pine, Pinus_jeffreyi
shore_pine, lodgepole, lodgepole_pine, spruce_pine, Pinus_contorta
Sierra_lodgepole_pine, Pinus_contorta_murrayana
loblolly_pine, frankincense_pine, Pinus_taeda
jack_pine, Pinus_banksiana
swamp_pine
longleaf_pine, pitch_pine, southern_yellow_pine, Georgia_pine, Pinus_palustris
shortleaf_pine, short-leaf_pine, shortleaf_yellow_pine, Pinus_echinata
red_pine, Canadian_red_pine, Pinus_resinosa
Scotch_pine, Scots_pine, Scotch_fir, Pinus_sylvestris
scrub_pine, Virginia_pine, Jersey_pine, Pinus_virginiana
Monterey_pine, Pinus_radiata
bristlecone_pine, Rocky_Mountain_bristlecone_pine, Pinus_aristata
table-mountain_pine, prickly_pine, hickory_pine, Pinus_pungens
knobcone_pine, Pinus_attenuata
Japanese_red_pine, Japanese_table_pine, Pinus_densiflora
Japanese_black_pine, black_pine, Pinus_thunbergii
Torrey_pine, Torrey's_pine, soledad_pine, grey-leaf_pine, sabine_pine, Pinus_torreyana
larch, larch_tree
American_larch, tamarack, black_larch, Larix_laricina
western_larch, western_tamarack, Oregon_larch, Larix_occidentalis
subalpine_larch, Larix_lyallii
European_larch, Larix_decidua
Siberian_larch, Larix_siberica, Larix_russica
golden_larch, Pseudolarix_amabilis
fir, fir_tree, true_fir
silver_fir
amabilis_fir, white_fir, Pacific_silver_fir, red_silver_fir, Christmas_tree, Abies_amabilis
European_silver_fir, Christmas_tree, Abies_alba
white_fir, Colorado_fir, California_white_fir, Abies_concolor, Abies_lowiana
balsam_fir, balm_of_Gilead, Canada_balsam, Abies_balsamea
Fraser_fir, Abies_fraseri
lowland_fir, lowland_white_fir, giant_fir, grand_fir, Abies_grandis
Alpine_fir, subalpine_fir, Abies_lasiocarpa
Santa_Lucia_fir, bristlecone_fir, Abies_bracteata, Abies_venusta
cedar, cedar_tree, true_cedar
cedar_of_Lebanon, Cedrus_libani
deodar, deodar_cedar, Himalayan_cedar, Cedrus_deodara
Atlas_cedar, Cedrus_atlantica
spruce
Norway_spruce, Picea_abies
weeping_spruce, Brewer's_spruce, Picea_breweriana
Engelmann_spruce, Engelmann's_spruce, Picea_engelmannii
white_spruce, Picea_glauca
black_spruce, Picea_mariana, spruce_pine
Siberian_spruce, Picea_obovata
Sitka_spruce, Picea_sitchensis
oriental_spruce, Picea_orientalis
Colorado_spruce, Colorado_blue_spruce, silver_spruce, Picea_pungens
red_spruce, eastern_spruce, yellow_spruce, Picea_rubens
hemlock, hemlock_tree
eastern_hemlock, Canadian_hemlock, spruce_pine, Tsuga_canadensis
Carolina_hemlock, Tsuga_caroliniana
mountain_hemlock, black_hemlock, Tsuga_mertensiana
western_hemlock, Pacific_hemlock, west_coast_hemlock, Tsuga_heterophylla
douglas_fir
green_douglas_fir, douglas_spruce, douglas_pine, douglas_hemlock, Oregon_fir, Oregon_pine, Pseudotsuga_menziesii
big-cone_spruce, big-cone_douglas_fir, Pseudotsuga_macrocarpa
Cathaya
cedar, cedar_tree
cypress, cypress_tree
gowen_cypress, Cupressus_goveniana
pygmy_cypress, Cupressus_pigmaea, Cupressus_goveniana_pigmaea
Santa_Cruz_cypress, Cupressus_abramsiana, Cupressus_goveniana_abramsiana
Arizona_cypress, Cupressus_arizonica
Guadalupe_cypress, Cupressus_guadalupensis
Monterey_cypress, Cupressus_macrocarpa
Mexican_cypress, cedar_of_Goa, Portuguese_cypress, Cupressus_lusitanica
Italian_cypress, Mediterranean_cypress, Cupressus_sempervirens
King_William_pine, Athrotaxis_selaginoides
Chilean_cedar, Austrocedrus_chilensis
incense_cedar, red_cedar, Calocedrus_decurrens, Libocedrus_decurrens
southern_white_cedar, coast_white_cedar, Atlantic_white_cedar, white_cypress, white_cedar, Chamaecyparis_thyoides
Oregon_cedar, Port_Orford_cedar, Lawson's_cypress, Lawson's_cedar, Chamaecyparis_lawsoniana
yellow_cypress, yellow_cedar, Nootka_cypress, Alaska_cedar, Chamaecyparis_nootkatensis
Japanese_cedar, Japan_cedar, sugi, Cryptomeria_japonica
juniper_berry
incense_cedar
kawaka, Libocedrus_plumosa
pahautea, Libocedrus_bidwillii, mountain_pine
metasequoia, dawn_redwood, Metasequoia_glyptostrodoides
arborvitae
western_red_cedar, red_cedar, canoe_cedar, Thuja_plicata
American_arborvitae, northern_white_cedar, white_cedar, Thuja_occidentalis
Oriental_arborvitae, Thuja_orientalis, Platycladus_orientalis
hiba_arborvitae, Thujopsis_dolobrata
keteleeria
Wollemi_pine
araucaria
monkey_puzzle, chile_pine, Araucaria_araucana
norfolk_island_pine, Araucaria_heterophylla, Araucaria_excelsa
new_caledonian_pine, Araucaria_columnaris
bunya_bunya, bunya_bunya_tree, Araucaria_bidwillii
hoop_pine, Moreton_Bay_pine, Araucaria_cunninghamii
kauri_pine, dammar_pine
kauri, kaury, Agathis_australis
amboina_pine, amboyna_pine, Agathis_dammara, Agathis_alba
dundathu_pine, queensland_kauri, smooth_bark_kauri, Agathis_robusta
red_kauri, Agathis_lanceolata
plum-yew
California_nutmeg, nutmeg-yew, Torreya_californica
stinking_cedar, stinking_yew, Torrey_tree, Torreya_taxifolia
celery_pine
celery_top_pine, celery-topped_pine, Phyllocladus_asplenifolius
tanekaha, Phyllocladus_trichomanoides
Alpine_celery_pine, Phyllocladus_alpinus
yellowwood, yellowwood_tree
gymnospermous_yellowwood
podocarp
yacca, yacca_podocarp, Podocarpus_coriaceus
brown_pine, Rockingham_podocarp, Podocarpus_elatus
cape_yellowwood, African_yellowwood, Podocarpus_elongatus
South-African_yellowwood, Podocarpus_latifolius
alpine_totara, Podocarpus_nivalis
totara, Podocarpus_totara
common_yellowwood, bastard_yellowwood, Afrocarpus_falcata
kahikatea, New_Zealand_Dacryberry, New_Zealand_white_pine, Dacrycarpus_dacrydioides, Podocarpus_dacrydioides
rimu, imou_pine, red_pine, Dacrydium_cupressinum
tarwood, tar-wood, Dacrydium_colensoi
common_sickle_pine, Falcatifolium_falciforme
yellow-leaf_sickle_pine, Falcatifolium_taxoides
tarwood, tar-wood, New_Zealand_mountain_pine, Halocarpus_bidwilli, Dacrydium_bidwilli
westland_pine, silver_pine, Lagarostrobus_colensoi
huon_pine, Lagarostrobus_franklinii, Dacrydium_franklinii
Chilean_rimu, Lepidothamnus_fonkii
mountain_rimu, Lepidothamnus_laxifolius, Dacridium_laxifolius
nagi, Nageia_nagi
miro, black_pine, Prumnopitys_ferruginea, Podocarpus_ferruginea
matai, black_pine, Prumnopitys_taxifolia, Podocarpus_spicata
plum-fruited_yew, Prumnopitys_andina, Prumnopitys_elegans
Prince_Albert_yew, Prince_Albert's_yew, Saxe-gothea_conspicua
Sundacarpus_amara, Prumnopitys_amara, Podocarpus_amara
Japanese_umbrella_pine, Sciadopitys_verticillata
yew
Old_World_yew, English_yew, Taxus_baccata
Pacific_yew, California_yew, western_yew, Taxus_brevifolia
Japanese_yew, Taxus_cuspidata
Florida_yew, Taxus_floridana
New_Caledonian_yew, Austrotaxus_spicata
white-berry_yew, Pseudotaxus_chienii
ginkgo, gingko, maidenhair_tree, Ginkgo_biloba
angiosperm, flowering_plant
dicot, dicotyledon, magnoliopsid, exogen
monocot, monocotyledon, liliopsid, endogen
floret, floweret
flower
bloomer
wildflower, wild_flower
apetalous_flower
inflorescence
rosebud
gynostegium
pollinium
pistil
gynobase
gynophore
stylopodium
carpophore
cornstalk, corn_stalk
petiolule
mericarp
micropyle
germ_tube
pollen_tube
gemma
galbulus
nectary, honey_gland
pericarp, seed_vessel
epicarp, exocarp
mesocarp
pip
silique, siliqua
cataphyll
perisperm
monocarp, monocarpic_plant, monocarpous_plant
sporophyte
gametophyte
megasporangium, macrosporangium
microspore
microsporangium
microsporophyll
archespore, archesporium
bonduc_nut, nicker_nut, nicker_seed
Job's_tears
oilseed, oil-rich_seed
castor_bean
cottonseed
candlenut
peach_pit
hypanthium, floral_cup, calyx_tube
petal, flower_petal
corolla
lip
perianth, chlamys, floral_envelope, perigone, perigonium
thistledown
custard_apple, custard_apple_tree
cherimoya, cherimoya_tree, Annona_cherimola
ilama, ilama_tree, Annona_diversifolia
soursop, prickly_custard_apple, soursop_tree, Annona_muricata
bullock's_heart, bullock's_heart_tree, bullock_heart, Annona_reticulata
sweetsop, sweetsop_tree, Annona_squamosa
pond_apple, pond-apple_tree, Annona_glabra
pawpaw, papaw, papaw_tree, Asimina_triloba
ilang-ilang, ylang-ylang, Cananga_odorata
lancewood, lancewood_tree, Oxandra_lanceolata
Guinea_pepper, negro_pepper, Xylopia_aethiopica
barberry
American_barberry, Berberis_canadensis
common_barberry, European_barberry, Berberis_vulgaris
Japanese_barberry, Berberis_thunbergii
Oregon_grape, Oregon_holly_grape, hollygrape, mountain_grape, holly-leaves_barberry, Mahonia_aquifolium
Oregon_grape, Mahonia_nervosa
mayapple, May_apple, wild_mandrake, Podophyllum_peltatum
May_apple
allspice
Carolina_allspice, strawberry_shrub, strawberry_bush, sweet_shrub, Calycanthus_floridus
spicebush, California_allspice, Calycanthus_occidentalis
katsura_tree, Cercidiphyllum_japonicum
laurel
true_laurel, bay, bay_laurel, bay_tree, Laurus_nobilis
camphor_tree, Cinnamomum_camphora
cinnamon, Ceylon_cinnamon, Ceylon_cinnamon_tree, Cinnamomum_zeylanicum
cassia, cassia-bark_tree, Cinnamomum_cassia
cassia_bark, Chinese_cinnamon
Saigon_cinnamon, Cinnamomum_loureirii
cinnamon_bark
spicebush, spice_bush, American_spicebush, Benjamin_bush, Lindera_benzoin, Benzoin_odoriferum
avocado, avocado_tree, Persea_Americana
laurel-tree, red_bay, Persea_borbonia
sassafras, sassafras_tree, Sassafras_albidum
California_laurel, California_bay_tree, Oregon_myrtle, pepperwood, spice_tree, sassafras_laurel, California_olive, mountain_laurel, Umbellularia_californica
anise_tree
purple_anise, Illicium_floridanum
star_anise, Illicium_anisatum
star_anise, Chinese_anise, Illicium_verum
magnolia
southern_magnolia, evergreen_magnolia, large-flowering_magnolia, bull_bay, Magnolia_grandiflora
umbrella_tree, umbrella_magnolia, elkwood, elk-wood, Magnolia_tripetala
earleaved_umbrella_tree, Magnolia_fraseri
cucumber_tree, Magnolia_acuminata
large-leaved_magnolia, large-leaved_cucumber_tree, great-leaved_macrophylla, Magnolia_macrophylla
saucer_magnolia, Chinese_magnolia, Magnolia_soulangiana
star_magnolia, Magnolia_stellata
sweet_bay, swamp_bay, swamp_laurel, Magnolia_virginiana
manglietia, genus_Manglietia
tulip_tree, tulip_poplar, yellow_poplar, canary_whitewood, Liriodendron_tulipifera
moonseed
common_moonseed, Canada_moonseed, yellow_parilla, Menispermum_canadense
Carolina_moonseed, Cocculus_carolinus
nutmeg, nutmeg_tree, Myristica_fragrans
water_nymph, fragrant_water_lily, pond_lily, Nymphaea_odorata
European_white_lily, Nymphaea_alba
southern_spatterdock, Nuphar_sagittifolium
lotus, Indian_lotus, sacred_lotus, Nelumbo_nucifera
water_chinquapin, American_lotus, yanquapin, Nelumbo_lutea
water-shield, fanwort, Cabomba_caroliniana
water-shield, Brasenia_schreberi, water-target
peony, paeony
buttercup, butterflower, butter-flower, crowfoot, goldcup, kingcup
meadow_buttercup, tall_buttercup, tall_crowfoot, tall_field_buttercup, Ranunculus_acris
water_crowfoot, water_buttercup, Ranunculus_aquatilis
lesser_celandine, pilewort, Ranunculus_ficaria
lesser_spearwort, Ranunculus_flammula
greater_spearwort, Ranunculus_lingua
western_buttercup, Ranunculus_occidentalis
creeping_buttercup, creeping_crowfoot, Ranunculus_repens
cursed_crowfoot, celery-leaved_buttercup, Ranunculus_sceleratus
aconite
monkshood, helmetflower, helmet_flower, Aconitum_napellus
wolfsbane, wolfbane, wolf's_bane, Aconitum_lycoctonum
baneberry, cohosh, herb_Christopher
baneberry
red_baneberry, redberry, red-berry, snakeberry, Actaea_rubra
pheasant's-eye, Adonis_annua
anemone, windflower
Alpine_anemone, mountain_anemone, Anemone_tetonensis
Canada_anemone, Anemone_Canadensis
thimbleweed, Anemone_cylindrica
wood_anemone, Anemone_nemorosa
wood_anemone, snowdrop, Anemone_quinquefolia
longheaded_thimbleweed, Anemone_riparia
snowdrop_anemone, snowdrop_windflower, Anemone_sylvestris
Virginia_thimbleweed, Anemone_virginiana
rue_anemone, Anemonella_thalictroides
columbine, aquilegia, aquilege
meeting_house, honeysuckle, Aquilegia_canadensis
blue_columbine, Aquilegia_caerulea, Aquilegia_scopulorum_calcarea
granny's_bonnets, Aquilegia_vulgaris
marsh_marigold, kingcup, meadow_bright, May_blob, cowslip, water_dragon, Caltha_palustris
American_bugbane, summer_cohosh, Cimicifuga_americana
black_cohosh, black_snakeroot, rattle-top, Cimicifuga_racemosa
fetid_bugbane, foetid_bugbane, Cimicifuga_foetida
clematis
pine_hyacinth, Clematis_baldwinii, Viorna_baldwinii
blue_jasmine, blue_jessamine, curly_clematis, marsh_clematis, Clematis_crispa
golden_clematis, Clematis_tangutica
scarlet_clematis, Clematis_texensis
leather_flower, Clematis_versicolor
leather_flower, vase-fine, vase_vine, Clematis_viorna
virgin's_bower, old_man's_beard, devil's_darning_needle, Clematis_virginiana
purple_clematis, purple_virgin's_bower, mountain_clematis, Clematis_verticillaris
goldthread, golden_thread, Coptis_groenlandica, Coptis_trifolia_groenlandica
rocket_larkspur, Consolida_ambigua, Delphinium_ajacis
delphinium
larkspur
winter_aconite, Eranthis_hyemalis
lenten_rose, black_hellebore, Helleborus_orientalis
green_hellebore, Helleborus_viridis
hepatica, liverleaf
goldenseal, golden_seal, yellow_root, turmeric_root, Hydrastis_Canadensis
false_rue_anemone, false_rue, Isopyrum_biternatum
giant_buttercup, Laccopetalum_giganteum
nigella
love-in-a-mist, Nigella_damascena
fennel_flower, Nigella_hispanica
black_caraway, nutmeg_flower, Roman_coriander, Nigella_sativa
pasqueflower, pasque_flower
meadow_rue
false_bugbane, Trautvetteria_carolinensis
globeflower, globe_flower
winter's_bark, winter's_bark_tree, Drimys_winteri
pepper_shrub, Pseudowintera_colorata, Wintera_colorata
sweet_gale, Scotch_gale, Myrica_gale
wax_myrtle
bay_myrtle, puckerbush, Myrica_cerifera
bayberry, candleberry, swamp_candleberry, waxberry, Myrica_pensylvanica
sweet_fern, Comptonia_peregrina, Comptonia_asplenifolia
corkwood, corkwood_tree, Leitneria_floridana
jointed_rush, Juncus_articulatus
toad_rush, Juncus_bufonius
slender_rush, Juncus_tenuis
zebrawood, zebrawood_tree
Connarus_guianensis
legume, leguminous_plant
legume
peanut
granadilla_tree, granadillo, Brya_ebenus
arariba, Centrolobium_robustum
tonka_bean, coumara_nut
courbaril, Hymenaea_courbaril
melilotus, melilot, sweet_clover
darling_pea, poison_bush
smooth_darling_pea, Swainsona_galegifolia
clover, trefoil
alpine_clover, Trifolium_alpinum
hop_clover, shamrock, lesser_yellow_trefoil, Trifolium_dubium
crimson_clover, Italian_clover, Trifolium_incarnatum
red_clover, purple_clover, Trifolium_pratense
buffalo_clover, Trifolium_reflexum, Trifolium_stoloniferum
white_clover, dutch_clover, shamrock, Trifolium_repens
mimosa
acacia
shittah, shittah_tree
wattle
black_wattle, Acacia_auriculiformis
gidgee, stinking_wattle, Acacia_cambegei
catechu, Jerusalem_thorn, Acacia_catechu
silver_wattle, mimosa, Acacia_dealbata
huisache, cassie, mimosa_bush, sweet_wattle, sweet_acacia, scented_wattle, flame_tree, Acacia_farnesiana
lightwood, Acacia_melanoxylon
golden_wattle, Acacia_pycnantha
fever_tree, Acacia_xanthophloea
coralwood, coral-wood, red_sandalwood, Barbados_pride, peacock_flower_fence, Adenanthera_pavonina
albizzia, albizia
silk_tree, Albizia_julibrissin, Albizzia_julibrissin
siris, siris_tree, Albizia_lebbeck, Albizzia_lebbeck
rain_tree, saman, monkeypod, monkey_pod, zaman, zamang, Albizia_saman
calliandra
conacaste, elephant's_ear, Enterolobium_cyclocarpa
inga
ice-cream_bean, Inga_edulis
guama, Inga_laurina
lead_tree, white_popinac, Leucaena_glauca, Leucaena_leucocephala
wild_tamarind, Lysiloma_latisiliqua, Lysiloma_bahamensis
sabicu, Lysiloma_sabicu
nitta_tree
Parkia_javanica
manila_tamarind, camachile, huamachil, wild_tamarind, Pithecellobium_dulce
cat's-claw, catclaw, black_bead, Pithecellodium_unguis-cati
honey_mesquite, Western_honey_mesquite, Prosopis_glandulosa
algarroba, algarrobilla, algarobilla
screw_bean, screwbean, tornillo, screwbean_mesquite, Prosopis_pubescens
screw_bean
dogbane
Indian_hemp, rheumatism_weed, Apocynum_cannabinum
bushman's_poison, ordeal_tree, Acocanthera_oppositifolia, Acocanthera_venenata
impala_lily, mock_azalia, desert_rose, kudu_lily, Adenium_obesum, Adenium_multiflorum
allamanda
common_allamanda, golden_trumpet, Allamanda_cathartica
dita, dita_bark, devil_tree, Alstonia_scholaris
Nepal_trumpet_flower, Easter_lily_vine, Beaumontia_grandiflora
carissa
hedge_thorn, natal_plum, Carissa_bispinosa
natal_plum, amatungulu, Carissa_macrocarpa, Carissa_grandiflora
periwinkle, rose_periwinkle, Madagascar_periwinkle, old_maid, Cape_periwinkle, red_periwinkle, cayenne_jasmine, Catharanthus_roseus, Vinca_rosea
ivory_tree, conessi, kurchi, kurchee, Holarrhena_pubescens, Holarrhena_antidysenterica
white_dipladenia, Mandevilla_boliviensis, Dipladenia_boliviensis
Chilean_jasmine, Mandevilla_laxa
oleander, rose_bay, Nerium_oleander
frangipani, frangipanni
West_Indian_jasmine, pagoda_tree, Plumeria_alba
rauwolfia, rauvolfia
snakewood, Rauwolfia_serpentina
Strophanthus_kombe
yellow_oleander, Thevetia_peruviana, Thevetia_neriifolia
myrtle, Vinca_minor
large_periwinkle, Vinca_major
arum, aroid
cuckoopint, lords-and-ladies, jack-in-the-pulpit, Arum_maculatum
black_calla, Arum_palaestinum
calamus
alocasia, elephant's_ear, elephant_ear
giant_taro, Alocasia_macrorrhiza
amorphophallus
pungapung, telingo_potato, elephant_yam, Amorphophallus_paeonifolius, Amorphophallus_campanulatus
devil's_tongue, snake_palm, umbrella_arum, Amorphophallus_rivieri
anthurium, tailflower, tail-flower
flamingo_flower, flamingo_plant, Anthurium_andraeanum, Anthurium_scherzerianum
jack-in-the-pulpit, Indian_turnip, wake-robin, Arisaema_triphyllum, Arisaema_atrorubens
friar's-cowl, Arisarum_vulgare
caladium
Caladium_bicolor
wild_calla, water_arum, Calla_palustris
taro, taro_plant, dalo, dasheen, Colocasia_esculenta
taro, cocoyam, dasheen, eddo
cryptocoryne, water_trumpet
dracontium
golden_pothos, pothos, ivy_arum, Epipremnum_aureum, Scindapsus_aureus
skunk_cabbage, Lysichiton_americanum
monstera
ceriman, Monstera_deliciosa
nephthytis
Nephthytis_afzelii
arrow_arum
green_arrow_arum, tuckahoe, Peltandra_virginica
philodendron
pistia, water_lettuce, water_cabbage, Pistia_stratiotes, Pistia_stratoites
pothos
spathiphyllum, peace_lily, spathe_flower
skunk_cabbage, polecat_weed, foetid_pothos, Symplocarpus_foetidus
yautia, tannia, spoonflower, malanga, Xanthosoma_sagittifolium, Xanthosoma_atrovirens
calla_lily, calla, arum_lily, Zantedeschia_aethiopica
pink_calla, Zantedeschia_rehmanii
golden_calla
duckweed
common_duckweed, lesser_duckweed, Lemna_minor
star-duckweed, Lemna_trisulca
great_duckweed, water_flaxseed, Spirodela_polyrrhiza
watermeal
common_wolffia, Wolffia_columbiana
aralia
American_angelica_tree, devil's_walking_stick, Hercules'-club, Aralia_spinosa
American_spikenard, petty_morel, life-of-man, Aralia_racemosa
bristly_sarsaparilla, bristly_sarsparilla, dwarf_elder, Aralia_hispida
Japanese_angelica_tree, Aralia_elata
Chinese_angelica, Chinese_angelica_tree, Aralia_stipulata
ivy, common_ivy, English_ivy, Hedera_helix
puka, Meryta_sinclairii
ginseng, nin-sin, Panax_ginseng, Panax_schinseng, Panax_pseudoginseng
ginseng
umbrella_tree, Schefflera_actinophylla, Brassaia_actinophylla
birthwort, Aristolochia_clematitis
Dutchman's-pipe, pipe_vine, Aristolochia_macrophylla, Aristolochia_durior
Virginia_snakeroot, Virginia_serpentaria, Virginia_serpentary, Aristolochia_serpentaria
Canada_ginger, black_snakeroot, Asarum_canadense
heartleaf, heart-leaf, Asarum_virginicum
heartleaf, heart-leaf, Asarum_shuttleworthii
asarabacca, Asarum_europaeum
caryophyllaceous_plant
corn_cockle, corn_campion, crown-of-the-field, Agrostemma_githago
sandwort
mountain_sandwort, mountain_starwort, mountain_daisy, Arenaria_groenlandica
pine-barren_sandwort, longroot, Arenaria_caroliniana
seabeach_sandwort, Arenaria_peploides
rock_sandwort, Arenaria_stricta
thyme-leaved_sandwort, Arenaria_serpyllifolia
mouse-ear_chickweed, mouse_eared_chickweed, mouse_ear, clammy_chickweed, chickweed
snow-in-summer, love-in-a-mist, Cerastium_tomentosum
Alpine_mouse-ear, Arctic_mouse-ear, Cerastium_alpinum
pink, garden_pink
sweet_William, Dianthus_barbatus
carnation, clove_pink, gillyflower, Dianthus_caryophyllus
china_pink, rainbow_pink, Dianthus_chinensis
Japanese_pink, Dianthus_chinensis_heddewigii
maiden_pink, Dianthus_deltoides
cheddar_pink, Diangus_gratianopolitanus
button_pink, Dianthus_latifolius
cottage_pink, grass_pink, Dianthus_plumarius
fringed_pink, Dianthus_supurbus
drypis
baby's_breath, babies'-breath, Gypsophila_paniculata
coral_necklace, Illecebrum_verticullatum
lychnis, catchfly
ragged_robin, cuckoo_flower, Lychnis_flos-cuculi, Lychins_floscuculi
scarlet_lychnis, maltese_cross, Lychins_chalcedonica
mullein_pink, rose_campion, gardener's_delight, dusty_miller, Lychnis_coronaria
sandwort, Moehringia_lateriflora
sandwort, Moehringia_mucosa
soapwort, hedge_pink, bouncing_Bet, bouncing_Bess, Saponaria_officinalis
knawel, knawe, Scleranthus_annuus
silene, campion, catchfly
moss_campion, Silene_acaulis
wild_pink, Silene_caroliniana
red_campion, red_bird's_eye, Silene_dioica, Lychnis_dioica
white_campion, evening_lychnis, white_cockle, bladder_campion, Silene_latifolia, Lychnis_alba
fire_pink, Silene_virginica
bladder_campion, Silene_uniflora, Silene_vulgaris
corn_spurry, corn_spurrey, Spergula_arvensis
sand_spurry, sea_spurry, Spergularia_rubra
chickweed
common_chickweed, Stellaria_media
cowherb, cow_cockle, Vaccaria_hispanica, Vaccaria_pyramidata, Saponaria_vaccaria
Hottentot_fig, Hottentot's_fig, sour_fig, Carpobrotus_edulis, Mesembryanthemum_edule
livingstone_daisy, Dorotheanthus_bellidiformis
fig_marigold, pebble_plant
ice_plant, icicle_plant, Mesembryanthemum_crystallinum
New_Zealand_spinach, Tetragonia_tetragonioides, Tetragonia_expansa
amaranth
amaranth
tumbleweed, Amaranthus_albus, Amaranthus_graecizans
prince's-feather, gentleman's-cane, prince's-plume, red_amaranth, purple_amaranth, Amaranthus_cruentus, Amaranthus_hybridus_hypochondriacus, Amaranthus_hybridus_erythrostachys
pigweed, Amaranthus_hypochondriacus
thorny_amaranth, Amaranthus_spinosus
alligator_weed, alligator_grass, Alternanthera_philoxeroides
cockscomb, common_cockscomb, Celosia_cristata, Celosia_argentea_cristata
cottonweed
globe_amaranth, bachelor's_button, Gomphrena_globosa
bloodleaf
saltwort, Batis_maritima
lamb's-quarters, pigweed, wild_spinach, Chenopodium_album
good-king-henry, allgood, fat_hen, wild_spinach, Chenopodium_bonus-henricus
Jerusalem_oak, feather_geranium, Mexican_tea, Chenopodium_botrys, Atriplex_mexicana
oak-leaved_goosefoot, oakleaf_goosefoot, Chenopodium_glaucum
sowbane, red_goosefoot, Chenopodium_hybridum
nettle-leaved_goosefoot, nettleleaf_goosefoot, Chenopodium_murale
red_goosefoot, French_spinach, Chenopodium_rubrum
stinking_goosefoot, Chenopodium_vulvaria
orach, orache
saltbush
garden_orache, mountain_spinach, Atriplex_hortensis
desert_holly, Atriplex_hymenelytra
quail_bush, quail_brush, white_thistle, Atriplex_lentiformis
beet, common_beet, Beta_vulgaris
beetroot, Beta_vulgaris_rubra
chard, Swiss_chard, spinach_beet, leaf_beet, chard_plant, Beta_vulgaris_cicla
mangel-wurzel, mangold-wurzel, mangold, Beta_vulgaris_vulgaris
winged_pigweed, tumbleweed, Cycloloma_atriplicifolium
halogeton, Halogeton_glomeratus
glasswort, samphire, Salicornia_europaea
saltwort, barilla, glasswort, kali, kelpwort, Salsola_kali, Salsola_soda
Russian_thistle, Russian_tumbleweed, Russian_cactus, tumbleweed, Salsola_kali_tenuifolia
greasewood, black_greasewood, Sarcobatus_vermiculatus
scarlet_musk_flower, Nyctaginia_capitata
sand_verbena
sweet_sand_verbena, Abronia_fragrans
yellow_sand_verbena, Abronia_latifolia
beach_pancake, Abronia_maritima
beach_sand_verbena, pink_sand_verbena, Abronia_umbellata
desert_sand_verbena, Abronia_villosa
trailing_four_o'clock, trailing_windmills, Allionia_incarnata
bougainvillea
umbrellawort
four_o'clock
common_four-o'clock, marvel-of-Peru, Mirabilis_jalapa, Mirabilis_uniflora
California_four_o'clock, Mirabilis_laevis, Mirabilis_californica
sweet_four_o'clock, maravilla, Mirabilis_longiflora
desert_four_o'clock, Colorado_four_o'clock, maravilla, Mirabilis_multiflora
mountain_four_o'clock, Mirabilis_oblongifolia
cockspur, Pisonia_aculeata
rattail_cactus, rat's-tail_cactus, Aporocactus_flagelliformis
saguaro, sahuaro, Carnegiea_gigantea
night-blooming_cereus
echinocactus, barrel_cactus
hedgehog_cactus
golden_barrel_cactus, Echinocactus_grusonii
hedgehog_cereus
rainbow_cactus
epiphyllum, orchid_cactus
barrel_cactus
night-blooming_cereus
chichipe, Lemaireocereus_chichipe
mescal, mezcal, peyote, Lophophora_williamsii
mescal_button, sacred_mushroom, magic_mushroom
mammillaria
feather_ball, Mammillaria_plumosa
garambulla, garambulla_cactus, Myrtillocactus_geometrizans
Knowlton's_cactus, Pediocactus_knowltonii
nopal
prickly_pear, prickly_pear_cactus
cholla, Opuntia_cholla
nopal, Opuntia_lindheimeri
tuna, Opuntia_tuna
Barbados_gooseberry, Barbados-gooseberry_vine, Pereskia_aculeata
mistletoe_cactus
Christmas_cactus, Schlumbergera_buckleyi, Schlumbergera_baridgesii
night-blooming_cereus
crab_cactus, Thanksgiving_cactus, Zygocactus_truncatus, Schlumbergera_truncatus
pokeweed
Indian_poke, Phytolacca_acinosa
poke, pigeon_berry, garget, scoke, Phytolacca_americana
ombu, bella_sombra, Phytolacca_dioica
bloodberry, blood_berry, rougeberry, rouge_plant, Rivina_humilis
portulaca
rose_moss, sun_plant, Portulaca_grandiflora
common_purslane, pussley, pussly, verdolagas, Portulaca_oleracea
rock_purslane
red_maids, redmaids, Calandrinia_ciliata
Carolina_spring_beauty, Claytonia_caroliniana
spring_beauty, Clatonia_lanceolata
Virginia_spring_beauty, Claytonia_virginica
siskiyou_lewisia, Lewisia_cotyledon
bitterroot, Lewisia_rediviva
broad-leaved_montia, Montia_cordifolia
blinks, blinking_chickweed, water_chickweed, Montia_lamprosperma
toad_lily, Montia_chamissoi
winter_purslane, miner's_lettuce, Cuban_spinach, Montia_perfoliata
flame_flower, flame-flower, flameflower, Talinum_aurantiacum
pigmy_talinum, Talinum_brevifolium
jewels-of-opar, Talinum_paniculatum
caper
native_pomegranate, Capparis_arborea
caper_tree, Jamaica_caper_tree, Capparis_cynophallophora
caper_tree, bay-leaved_caper, Capparis_flexuosa
common_caper, Capparis_spinosa
spiderflower, cleome
Rocky_Mountain_bee_plant, stinking_clover, Cleome_serrulata
clammyweed, Polanisia_graveolens, Polanisia_dodecandra
crucifer, cruciferous_plant
cress, cress_plant
watercress
stonecress, stone_cress
garlic_mustard, hedge_garlic, sauce-alone, jack-by-the-hedge, Alliaria_officinalis
alyssum, madwort
rose_of_Jericho, resurrection_plant, Anastatica_hierochuntica
Arabidopsis_thaliana, mouse-ear_cress
Arabidopsis_lyrata
rock_cress, rockcress
sicklepod, Arabis_Canadensis
tower_mustard, tower_cress, Turritis_glabra, Arabis_glabra
horseradish, horseradish_root
winter_cress, St._Barbara's_herb, scurvy_grass
yellow_rocket, rockcress, rocket_cress, Barbarea_vulgaris, Sisymbrium_barbarea
hoary_alison, hoary_alyssum, Berteroa_incana
buckler_mustard, Biscutalla_laevigata
wild_cabbage, Brassica_oleracea
cabbage, cultivated_cabbage, Brassica_oleracea
head_cabbage, head_cabbage_plant, Brassica_oleracea_capitata
savoy_cabbage
brussels_sprout, Brassica_oleracea_gemmifera
cauliflower, Brassica_oleracea_botrytis
broccoli, Brassica_oleracea_italica
collard
kohlrabi, Brassica_oleracea_gongylodes
turnip_plant
turnip, white_turnip, Brassica_rapa
rutabaga, turnip_cabbage, swede, Swedish_turnip, rutabaga_plant, Brassica_napus_napobrassica
broccoli_raab, broccoli_rabe, Brassica_rapa_ruvo
mustard
chinese_mustard, indian_mustard, leaf_mustard, gai_choi, Brassica_juncea
bok_choy, bok_choi, pakchoi, pak_choi, Chinese_white_cabbage, Brassica_rapa_chinensis
rape, colza, Brassica_napus
rapeseed
shepherd's_purse, shepherd's_pouch, Capsella_bursa-pastoris
lady's_smock, cuckooflower, cuckoo_flower, meadow_cress, Cardamine_pratensis
coral-root_bittercress, coralroot, coralwort, Cardamine_bulbifera, Dentaria_bulbifera
crinkleroot, crinkle-root, crinkle_root, pepper_root, toothwort, Cardamine_diphylla, Dentaria_diphylla
American_watercress, mountain_watercress, Cardamine_rotundifolia
spring_cress, Cardamine_bulbosa
purple_cress, Cardamine_douglasii
wallflower, Cheiranthus_cheiri, Erysimum_cheiri
prairie_rocket
scurvy_grass, common_scurvy_grass, Cochlearia_officinalis
sea_kale, sea_cole, Crambe_maritima
tansy_mustard, Descurainia_pinnata
draba
wallflower
prairie_rocket
Siberian_wall_flower, Erysimum_allionii, Cheiranthus_allionii
western_wall_flower, Erysimum_asperum, Cheiranthus_asperus, Erysimum_arkansanum
wormseed_mustard, Erysimum_cheiranthoides
heliophila
damask_violet, Dame's_violet, sweet_rocket, Hesperis_matronalis
tansy-leaved_rocket, Hugueninia_tanacetifolia, Sisymbrium_tanacetifolia
candytuft
woad
dyer's_woad, Isatis_tinctoria
bladderpod
sweet_alyssum, sweet_alison, Lobularia_maritima
Malcolm_stock, stock
Virginian_stock, Virginia_stock, Malcolmia_maritima
stock, gillyflower
brompton_stock, Matthiola_incana
bladderpod
chamois_cress, Pritzelago_alpina, Lepidium_alpina
radish_plant, radish
jointed_charlock, wild_radish, wild_rape, runch, Raphanus_raphanistrum
radish, Raphanus_sativus
radish, daikon, Japanese_radish, Raphanus_sativus_longipinnatus
marsh_cress, yellow_watercress, Rorippa_islandica
great_yellowcress, Rorippa_amphibia, Nasturtium_amphibium
schizopetalon, Schizopetalon_walkeri
field_mustard, wild_mustard, charlock, chadlock, Brassica_kaber, Sinapis_arvensis
hedge_mustard, Sisymbrium_officinale
desert_plume, prince's-plume, Stanleya_pinnata, Cleome_pinnata
pennycress
field_pennycress, French_weed, fanweed, penny_grass, stinkweed, mithridate_mustard, Thlaspi_arvense
fringepod, lacepod
bladderpod
wasabi
poppy
Iceland_poppy, Papaver_alpinum
western_poppy, Papaver_californicum
prickly_poppy, Papaver_argemone
Iceland_poppy, arctic_poppy, Papaver_nudicaule
oriental_poppy, Papaver_orientale
corn_poppy, field_poppy, Flanders_poppy, Papaver_rhoeas
opium_poppy, Papaver_somniferum
prickly_poppy, argemone, white_thistle, devil's_fig
Mexican_poppy, Argemone_mexicana
bocconia, tree_celandine, Bocconia_frutescens
celandine, greater_celandine, swallowwort, swallow_wort, Chelidonium_majus
corydalis
climbing_corydalis, Corydalis_claviculata, Fumaria_claviculata
California_poppy, Eschscholtzia_californica
horn_poppy, horned_poppy, yellow_horned_poppy, sea_poppy, Glaucium_flavum
golden_cup, Mexican_tulip_poppy, Hunnemania_fumariifolia
plume_poppy, bocconia, Macleaya_cordata
blue_poppy, Meconopsis_betonicifolia
Welsh_poppy, Meconopsis_cambrica
creamcups, Platystemon_californicus
matilija_poppy, California_tree_poppy, Romneya_coulteri
wind_poppy, flaming_poppy, Stylomecon_heterophyllum, Papaver_heterophyllum
celandine_poppy, wood_poppy, Stylophorum_diphyllum
climbing_fumitory, Allegheny_vine, Adlumia_fungosa, Fumaria_fungosa
bleeding_heart, lyreflower, lyre-flower, Dicentra_spectabilis
Dutchman's_breeches, Dicentra_cucullaria
squirrel_corn, Dicentra_canadensis
composite, composite_plant
compass_plant, compass_flower
everlasting, everlasting_flower
achillea
yarrow, milfoil, Achillea_millefolium
pink-and-white_everlasting, pink_paper_daisy, Acroclinium_roseum
white_snakeroot, white_sanicle, Ageratina_altissima, Eupatorium_rugosum
ageratum
common_ageratum, Ageratum_houstonianum
sweet_sultan, Amberboa_moschata, Centaurea_moschata
ragweed, ambrosia, bitterweed
common_ragweed, Ambrosia_artemisiifolia
great_ragweed, Ambrosia_trifida
western_ragweed, perennial_ragweed, Ambrosia_psilostachya
ammobium
winged_everlasting, Ammobium_alatum
pellitory, pellitory-of-Spain, Anacyclus_pyrethrum
pearly_everlasting, cottonweed, Anaphalis_margaritacea
andryala
plantain-leaved_pussytoes
field_pussytoes
solitary_pussytoes
mountain_everlasting
mayweed, dog_fennel, stinking_mayweed, stinking_chamomile, Anthemis_cotula
yellow_chamomile, golden_marguerite, dyers'_chamomile, Anthemis_tinctoria
corn_chamomile, field_chamomile, corn_mayweed, Anthemis_arvensis
woolly_daisy, dwarf_daisy, Antheropeas_wallacei, Eriophyllum_wallacei
burdock, clotbur
great_burdock, greater_burdock, cocklebur, Arctium_lappa
African_daisy
blue-eyed_African_daisy, Arctotis_stoechadifolia, Arctotis_venusta
marguerite, marguerite_daisy, Paris_daisy, Chrysanthemum_frutescens, Argyranthemum_frutescens
silversword, Argyroxiphium_sandwicense
arnica
heartleaf_arnica, Arnica_cordifolia
Arnica_montana
lamb_succory, dwarf_nipplewort, Arnoseris_minima
artemisia
mugwort
sweet_wormwood, Artemisia_annua
field_wormwood, Artemisia_campestris
tarragon, estragon, Artemisia_dracunculus
sand_sage, silvery_wormwood, Artemisia_filifolia
wormwood_sage, prairie_sagewort, Artemisia_frigida
western_mugwort, white_sage, cudweed, prairie_sage, Artemisia_ludoviciana, Artemisia_gnaphalodes
Roman_wormwood, Artemis_pontica
bud_brush, bud_sagebrush, Artemis_spinescens
common_mugwort, Artemisia_vulgaris
aster
wood_aster
whorled_aster, Aster_acuminatus
heath_aster, Aster_arenosus
heart-leaved_aster, Aster_cordifolius
white_wood_aster, Aster_divaricatus
bushy_aster, Aster_dumosus
heath_aster, Aster_ericoides
white_prairie_aster, Aster_falcatus
stiff_aster, Aster_linarifolius
goldilocks, goldilocks_aster, Aster_linosyris, Linosyris_vulgaris
large-leaved_aster, Aster_macrophyllus
New_England_aster, Aster_novae-angliae
Michaelmas_daisy, New_York_aster, Aster_novi-belgii
upland_white_aster, Aster_ptarmicoides
Short's_aster, Aster_shortii
sea_aster, sea_starwort, Aster_tripolium
prairie_aster, Aster_turbinellis
annual_salt-marsh_aster
aromatic_aster
arrow_leaved_aster
azure_aster
bog_aster
crooked-stemmed_aster
Eastern_silvery_aster
flat-topped_white_aster
late_purple_aster
panicled_aster
perennial_salt_marsh_aster
purple-stemmed_aster
rough-leaved_aster
rush_aster
Schreiber's_aster
small_white_aster
smooth_aster
southern_aster
starved_aster, calico_aster
tradescant's_aster
wavy-leaved_aster
Western_silvery_aster
willow_aster
ayapana, Ayapana_triplinervis, Eupatorium_aya-pana
mule_fat, Baccharis_viminea
balsamroot
daisy
common_daisy, English_daisy, Bellis_perennis
bur_marigold, burr_marigold, beggar-ticks, beggar's-ticks, sticktight
Spanish_needles, Bidens_bipinnata
tickseed_sunflower, Bidens_coronata, Bidens_trichosperma
European_beggar-ticks, trifid_beggar-ticks, trifid_bur_marigold, Bidens_tripartita
slender_knapweed
false_chamomile
Swan_River_daisy, Brachycome_Iberidifolia
woodland_oxeye, Buphthalmum_salicifolium
Indian_plantain
calendula
common_marigold, pot_marigold, ruddles, Scotch_marigold, Calendula_officinalis
China_aster, Callistephus_chinensis
thistle
welted_thistle, Carduus_crispus
musk_thistle, nodding_thistle, Carduus_nutans
carline_thistle
stemless_carline_thistle, Carlina_acaulis
common_carline_thistle, Carlina_vulgaris
safflower, false_saffron, Carthamus_tinctorius
safflower_seed
catananche
blue_succory, cupid's_dart, Catananche_caerulea
centaury
dusty_miller, Centaurea_cineraria, Centaurea_gymnocarpa
cornflower, bachelor's_button, bluebottle, Centaurea_cyanus
star-thistle, caltrop, Centauria_calcitrapa
knapweed
sweet_sultan, Centaurea_imperialis
great_knapweed, greater_knapweed, Centaurea_scabiosa
Barnaby's_thistle, yellow_star-thistle, Centaurea_solstitialis
chamomile, camomile, Chamaemelum_nobilis, Anthemis_nobilis
chaenactis
chrysanthemum
corn_marigold, field_marigold, Chrysanthemum_segetum
crown_daisy, Chrysanthemum_coronarium
chop-suey_greens, tong_ho, shun_giku, Chrysanthemum_coronarium_spatiosum
golden_aster
Maryland_golden_aster, Chrysopsis_mariana
goldenbush
rabbit_brush, rabbit_bush, Chrysothamnus_nauseosus
chicory, succory, chicory_plant, Cichorium_intybus
endive, witloof, Cichorium_endivia
chicory, chicory_root
plume_thistle, plumed_thistle
Canada_thistle, creeping_thistle, Cirsium_arvense
field_thistle, Cirsium_discolor
woolly_thistle, Cirsium_flodmanii
European_woolly_thistle, Cirsium_eriophorum
melancholy_thistle, Cirsium_heterophylum, Cirsium_helenioides
brook_thistle, Cirsium_rivulare
bull_thistle, boar_thistle, spear_thistle, Cirsium_vulgare, Cirsium_lanceolatum
blessed_thistle, sweet_sultan, Cnicus_benedictus
mistflower, mist-flower, ageratum, Conoclinium_coelestinum, Eupatorium_coelestinum
horseweed, Canadian_fleabane, fleabane, Conyza_canadensis, Erigeron_canadensis
coreopsis, tickseed, tickweed, tick-weed
giant_coreopsis, Coreopsis_gigantea
sea_dahlia, Coreopsis_maritima
calliopsis, Coreopsis_tinctoria
cosmos, cosmea
brass_buttons, Cotula_coronopifolia
billy_buttons
hawk's-beard, hawk's-beards
artichoke, globe_artichoke, artichoke_plant, Cynara_scolymus
cardoon, Cynara_cardunculus
dahlia, Dahlia_pinnata
German_ivy, Delairea_odorata, Senecio_milkanioides
florist's_chrysanthemum, florists'_chrysanthemum, mum, Dendranthema_grandifloruom, Chrysanthemum_morifolium
cape_marigold, sun_marigold, star_of_the_veldt
leopard's-bane, leopardbane
coneflower
globe_thistle
elephant's-foot
tassel_flower, Emilia_sagitta
brittlebush, brittle_bush, incienso, Encelia_farinosa
sunray, Enceliopsis_nudicaulis
engelmannia
fireweed, Erechtites_hieracifolia
fleabane
blue_fleabane, Erigeron_acer
daisy_fleabane, Erigeron_annuus
orange_daisy, orange_fleabane, Erigeron_aurantiacus
spreading_fleabane, Erigeron_divergens
seaside_daisy, beach_aster, Erigeron_glaucous
Philadelphia_fleabane, Erigeron_philadelphicus
robin's_plantain, Erigeron_pulchellus
showy_daisy, Erigeron_speciosus
woolly_sunflower
golden_yarrow, Eriophyllum_lanatum
dog_fennel, Eupatorium_capillifolium
Joe-Pye_weed, spotted_Joe-Pye_weed, Eupatorium_maculatum
boneset, agueweed, thoroughwort, Eupatorium_perfoliatum
Joe-Pye_weed, purple_boneset, trumpet_weed, marsh_milkweed, Eupatorium_purpureum
blue_daisy, blue_marguerite, Felicia_amelloides
kingfisher_daisy, Felicia_bergeriana
cotton_rose, cudweed, filago
herba_impia, Filago_germanica
gaillardia
gazania
treasure_flower, Gazania_rigens
African_daisy
Barberton_daisy, Transvaal_daisy, Gerbera_jamesonii
desert_sunflower, Gerea_canescens
cudweed
chafeweed, wood_cudweed, Gnaphalium_sylvaticum
gumweed, gum_plant, tarweed, rosinweed
Grindelia_robusta
curlycup_gumweed, Grindelia_squarrosa
little-head_snakeweed, Gutierrezia_microcephala
rabbitweed, rabbit-weed, snakeweed, broom_snakeweed, broom_snakeroot, turpentine_weed, Gutierrezia_sarothrae
broomweed, broom-weed, Gutierrezia_texana
velvet_plant, purple_velvet_plant, royal_velvet_plant, Gynura_aurantiaca
goldenbush
camphor_daisy, Haplopappus_phyllocephalus
yellow_spiny_daisy, Haplopappus_spinulosus
hoary_golden_bush, Hazardia_cana
sneezeweed
orange_sneezeweed, owlclaws, Helenium_hoopesii
rosilla, Helenium_puberulum
sunflower, helianthus
swamp_sunflower, Helianthus_angustifolius
common_sunflower, mirasol, Helianthus_annuus
giant_sunflower, tall_sunflower, Indian_potato, Helianthus_giganteus
showy_sunflower, Helianthus_laetiflorus
Maximilian's_sunflower, Helianthus_maximilianii
prairie_sunflower, Helianthus_petiolaris
Jerusalem_artichoke, girasol, Jerusalem_artichoke_sunflower, Helianthus_tuberosus
Jerusalem_artichoke
strawflower, golden_everlasting, yellow_paper_daisy, Helichrysum_bracteatum
heliopsis, oxeye
strawflower
hairy_golden_aster, prairie_golden_aster, Heterotheca_villosa, Chrysopsis_villosa
hawkweed
rattlesnake_weed, Hieracium_venosum
alpine_coltsfoot, Homogyne_alpina, Tussilago_alpina
alpine_gold, alpine_hulsea, Hulsea_algida
dwarf_hulsea, Hulsea_nana
cat's-ear, California_dandelion, capeweed, gosmore, Hypochaeris_radicata
inula
marsh_elder, iva
burweed_marsh_elder, false_ragweed, Iva_xanthifolia
krigia
dwarf_dandelion, Krigia_dandelion, Krigia_bulbosa
garden_lettuce, common_lettuce, Lactuca_sativa
cos_lettuce, romaine_lettuce, Lactuca_sativa_longifolia
leaf_lettuce, Lactuca_sativa_crispa
celtuce, stem_lettuce, Lactuca_sativa_asparagina
prickly_lettuce, horse_thistle, Lactuca_serriola, Lactuca_scariola
goldfields, Lasthenia_chrysostoma
tidytips, tidy_tips, Layia_platyglossa
hawkbit
fall_dandelion, arnica_bud, Leontodon_autumnalis
edelweiss, Leontopodium_alpinum
oxeye_daisy, ox-eyed_daisy, marguerite, moon_daisy, white_daisy, Leucanthemum_vulgare, Chrysanthemum_leucanthemum
oxeye_daisy, Leucanthemum_maximum, Chrysanthemum_maximum
shasta_daisy, Leucanthemum_superbum, Chrysanthemum_maximum_maximum
Pyrenees_daisy, Leucanthemum_lacustre, Chrysanthemum_lacustre
north_island_edelweiss, Leucogenes_leontopodium
blazing_star, button_snakeroot, gayfeather, gay-feather, snakeroot
dotted_gayfeather, Liatris_punctata
dense_blazing_star, Liatris_pycnostachya
Texas_star, Lindheimera_texana
African_daisy, yellow_ageratum, Lonas_inodora, Lonas_annua
tahoka_daisy, tansy_leaf_aster, Machaeranthera_tanacetifolia
sticky_aster, Machaeranthera_bigelovii
Mojave_aster, Machaeranthera_tortifoloia
tarweed
sweet_false_chamomile, wild_chamomile, German_chamomile, Matricaria_recutita, Matricaria_chamomilla
pineapple_weed, rayless_chamomile, Matricaria_matricarioides
climbing_hempweed, climbing_boneset, wild_climbing_hempweed, climbing_hemp-vine, Mikania_scandens
mutisia
rattlesnake_root
white_lettuce, cankerweed, Nabalus_alba, Prenanthes_alba
daisybush, daisy-bush, daisy_bush
New_Zealand_daisybush, Olearia_haastii
cotton_thistle, woolly_thistle, Scotch_thistle, Onopordum_acanthium, Onopordon_acanthium
othonna
cascade_everlasting, Ozothamnus_secundiflorus, Helichrysum_secundiflorum
butterweed
American_feverfew, wild_quinine, prairie_dock, Parthenium_integrifolium
cineraria, Pericallis_cruenta, Senecio_cruentus
florest's_cineraria, Pericallis_hybrida
butterbur, bog_rhubarb, Petasites_hybridus, Petasites_vulgaris
winter_heliotrope, sweet_coltsfoot, Petasites_fragrans
sweet_coltsfoot, Petasites_sagitattus
oxtongue, bristly_oxtongue, bitterweed, bugloss, Picris_echioides
hawkweed
mouse-ear_hawkweed, Pilosella_officinarum, Hieracium_pilocella
stevia
rattlesnake_root, Prenanthes_purpurea
fleabane, feabane_mullet, Pulicaria_dysenterica
sheep_plant, vegetable_sheep, Raoulia_lutescens, Raoulia_australis
coneflower
Mexican_hat, Ratibida_columnaris
long-head_coneflower, prairie_coneflower, Ratibida_columnifera
prairie_coneflower, Ratibida_tagetes
Swan_River_everlasting, rhodanthe, Rhodanthe_manglesii, Helipterum_manglesii
coneflower
black-eyed_Susan, Rudbeckia_hirta, Rudbeckia_serotina
cutleaved_coneflower, Rudbeckia_laciniata
golden_glow, double_gold, hortensia, Rudbeckia_laciniata_hortensia
lavender_cotton, Santolina_chamaecyparissus
creeping_zinnia, Sanvitalia_procumbens
golden_thistle
Spanish_oyster_plant, Scolymus_hispanicus
nodding_groundsel, Senecio_bigelovii
dusty_miller, Senecio_cineraria, Cineraria_maritima
butterweed, ragwort, Senecio_glabellus
ragwort, tansy_ragwort, ragweed, benweed, Senecio_jacobaea
arrowleaf_groundsel, Senecio_triangularis
black_salsify, viper's_grass, scorzonera, Scorzonera_hispanica
white-topped_aster
narrow-leaved_white-topped_aster
silver_sage, silver_sagebrush, grey_sage, gray_sage, Seriphidium_canum, Artemisia_cana
sea_wormwood, Seriphidium_maritimum, Artemisia_maritima
sawwort, Serratula_tinctoria
rosinweed, Silphium_laciniatum
milk_thistle, lady's_thistle, Our_Lady's_mild_thistle, holy_thistle, blessed_thistle, Silybum_marianum
goldenrod
silverrod, Solidago_bicolor
meadow_goldenrod, Canadian_goldenrod, Solidago_canadensis
Missouri_goldenrod, Solidago_missouriensis
alpine_goldenrod, Solidago_multiradiata
grey_goldenrod, gray_goldenrod, Solidago_nemoralis
Blue_Mountain_tea, sweet_goldenrod, Solidago_odora
dyer's_weed, Solidago_rugosa
seaside_goldenrod, beach_goldenrod, Solidago_sempervirens
narrow_goldenrod, Solidago_spathulata
Boott's_goldenrod
Elliott's_goldenrod
Ohio_goldenrod
rough-stemmed_goldenrod
showy_goldenrod
tall_goldenrod
zigzag_goldenrod, broad_leaved_goldenrod
sow_thistle, milk_thistle
milkweed, Sonchus_oleraceus
stevia
stokes'_aster, cornflower_aster, Stokesia_laevis
marigold
African_marigold, big_marigold, Aztec_marigold, Tagetes_erecta
French_marigold, Tagetes_patula
painted_daisy, pyrethrum, Tanacetum_coccineum, Chrysanthemum_coccineum
pyrethrum, Dalmatian_pyrethrum, Dalmatia_pyrethrum, Tanacetum_cinerariifolium, Chrysanthemum_cinerariifolium
northern_dune_tansy, Tanacetum_douglasii
feverfew, Tanacetum_parthenium, Chrysanthemum_parthenium
dusty_miller, silver-lace, silver_lace, Tanacetum_ptarmiciflorum, Chrysanthemum_ptarmiciflorum
tansy, golden_buttons, scented_fern, Tanacetum_vulgare
dandelion, blowball
common_dandelion, Taraxacum_ruderalia, Taraxacum_officinale
dandelion_green
Russian_dandelion, kok-saghyz, kok-sagyz, Taraxacum_kok-saghyz
stemless_hymenoxys, Tetraneuris_acaulis, Hymenoxys_acaulis
Mexican_sunflower, tithonia
Easter_daisy, stemless_daisy, Townsendia_Exscapa
yellow_salsify, Tragopogon_dubius
salsify, oyster_plant, vegetable_oyster, Tragopogon_porrifolius
meadow_salsify, goatsbeard, shepherd's_clock, Tragopogon_pratensis
scentless_camomile, scentless_false_camomile, scentless_mayweed, scentless_hayweed, corn_mayweed, Tripleurospermum_inodorum, Matricaria_inodorum
turfing_daisy, Tripleurospermum_tchihatchewii, Matricaria_tchihatchewii
coltsfoot, Tussilago_farfara
ursinia
crownbeard, crown-beard, crown_beard
wingstem, golden_ironweed, yellow_ironweed, golden_honey_plant, Verbesina_alternifolia, Actinomeris_alternifolia
cowpen_daisy, golden_crownbeard, golden_crown_beard, butter_daisy, Verbesina_encelioides, Ximenesia_encelioides
gravelweed, Verbesina_helianthoides
Virginia_crownbeard, frostweed, frost-weed, Verbesina_virginica
ironweed, vernonia
mule's_ears, Wyethia_amplexicaulis
white-rayed_mule's_ears, Wyethia_helianthoides
cocklebur, cockle-bur, cockleburr, cockle-burr
xeranthemum
immortelle, Xeranthemum_annuum
zinnia, old_maid, old_maid_flower
white_zinnia, Zinnia_acerosa
little_golden_zinnia, Zinnia_grandiflora
blazing_star, Mentzelia_livicaulis, Mentzelia_laevicaulis
bartonia, Mentzelia_lindleyi
achene
samara, key_fruit, key
campanula, bellflower
creeping_bellflower, Campanula_rapunculoides
Canterbury_bell, cup_and_saucer, Campanula_medium
tall_bellflower, Campanula_americana
marsh_bellflower, Campanula_aparinoides
clustered_bellflower, Campanula_glomerata
peach_bells, peach_bell, willow_bell, Campanula_persicifolia
chimney_plant, chimney_bellflower, Campanula_pyramidalis
rampion, rampion_bellflower, Campanula_rapunculus
tussock_bellflower, spreading_bellflower, Campanula_carpatica
orchid, orchidaceous_plant
orchis
male_orchis, early_purple_orchid, Orchis_mascula
butterfly_orchid, butterfly_orchis, Orchis_papilionaceae
showy_orchis, purple_orchis, purple-hooded_orchis, Orchis_spectabilis
aerides
angrecum
jewel_orchid
puttyroot, adam-and-eve, Aplectrum_hyemale
arethusa
bog_rose, wild_pink, dragon's_mouth, Arethusa_bulbosa
bletia
Bletilla_striata, Bletia_striata
brassavola
spider_orchid, Brassia_lawrenceana
spider_orchid, Brassia_verrucosa
caladenia
calanthe
grass_pink, Calopogon_pulchellum, Calopogon_tuberosum
calypso, fairy-slipper, Calypso_bulbosa
cattleya
helleborine
red_helleborine, Cephalanthera_rubra
spreading_pogonia, funnel-crest_rosebud_orchid, Cleistes_divaricata, Pogonia_divaricata
rosebud_orchid, Cleistes_rosea, Pogonia_rosea
satyr_orchid, Coeloglossum_bracteatum
frog_orchid, Coeloglossum_viride
coelogyne
coral_root
spotted_coral_root, Corallorhiza_maculata
striped_coral_root, Corallorhiza_striata
early_coral_root, pale_coral_root, Corallorhiza_trifida
swan_orchid, swanflower, swan-flower, swanneck, swan-neck
cymbid, cymbidium
cypripedia
lady's_slipper, lady-slipper, ladies'_slipper, slipper_orchid
moccasin_flower, nerveroot, Cypripedium_acaule
common_lady's-slipper, showy_lady's-slipper, showy_lady_slipper, Cypripedium_reginae, Cypripedium_album
ram's-head, ram's-head_lady's_slipper, Cypripedium_arietinum
yellow_lady's_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum
large_yellow_lady's_slipper, Cypripedium_calceolus_pubescens
California_lady's_slipper, Cypripedium_californicum
clustered_lady's_slipper, Cypripedium_fasciculatum
mountain_lady's_slipper, Cypripedium_montanum
marsh_orchid
common_spotted_orchid, Dactylorhiza_fuchsii, Dactylorhiza_maculata_fuchsii
dendrobium
disa
phantom_orchid, snow_orchid, Eburophyton_austinae
tulip_orchid, Encyclia_citrina, Cattleya_citrina
butterfly_orchid, Encyclia_tampensis, Epidendrum_tampense
butterfly_orchid, butterfly_orchis, Epidendrum_venosum, Encyclia_venosa
epidendron
helleborine
Epipactis_helleborine
stream_orchid, chatterbox, giant_helleborine, Epipactis_gigantea
tongueflower, tongue-flower
rattlesnake_plantain, helleborine
fragrant_orchid, Gymnadenia_conopsea
short-spurred_fragrant_orchid, Gymnadenia_odoratissima
fringed_orchis, fringed_orchid
frog_orchid
rein_orchid, rein_orchis
bog_rein_orchid, bog_candles, Habenaria_dilatata
white_fringed_orchis, white_fringed_orchid, Habenaria_albiflora
elegant_Habenaria, Habenaria_elegans
purple-fringed_orchid, purple-fringed_orchis, Habenaria_fimbriata
coastal_rein_orchid, Habenaria_greenei
Hooker's_orchid, Habenaria_hookeri
ragged_orchid, ragged_orchis, ragged-fringed_orchid, green_fringed_orchis, Habenaria_lacera
prairie_orchid, prairie_white-fringed_orchis, Habenaria_leucophaea
snowy_orchid, Habenaria_nivea
round-leaved_rein_orchid, Habenaria_orbiculata
purple_fringeless_orchid, purple_fringeless_orchis, Habenaria_peramoena
purple-fringed_orchid, purple-fringed_orchis, Habenaria_psycodes
Alaska_rein_orchid, Habenaria_unalascensis
crested_coral_root, Hexalectris_spicata
Texas_purple_spike, Hexalectris_warnockii
lizard_orchid, Himantoglossum_hircinum
laelia
liparis
twayblade
fen_orchid, fen_orchis, Liparis_loeselii
broad-leaved_twayblade, Listera_convallarioides
lesser_twayblade, Listera_cordata
twayblade, Listera_ovata
green_adder's_mouth, Malaxis-unifolia, Malaxis_ophioglossoides
masdevallia
maxillaria
pansy_orchid
odontoglossum
oncidium, dancing_lady_orchid, butterfly_plant, butterfly_orchid
bee_orchid, Ophrys_apifera
fly_orchid, Ophrys_insectifera, Ophrys_muscifera
spider_orchid
early_spider_orchid, Ophrys_sphegodes
Venus'_slipper, Venus's_slipper, Venus's_shoe
phaius
moth_orchid, moth_plant
butterfly_plant, Phalaenopsis_amabilis
rattlesnake_orchid
lesser_butterfly_orchid, Platanthera_bifolia, Habenaria_bifolia
greater_butterfly_orchid, Platanthera_chlorantha, Habenaria_chlorantha
prairie_white-fringed_orchid, Platanthera_leucophea
tangle_orchid
Indian_crocus
pleurothallis
pogonia
butterfly_orchid
Psychopsis_krameriana, Oncidium_papilio_kramerianum
Psychopsis_papilio, Oncidium_papilio
helmet_orchid, greenhood
foxtail_orchid
orange-blossom_orchid, Sarcochilus_falcatus
sobralia
ladies'_tresses, lady's_tresses
screw_augur, Spiranthes_cernua
hooded_ladies'_tresses, Spiranthes_romanzoffiana
western_ladies'_tresses, Spiranthes_porrifolia
European_ladies'_tresses, Spiranthes_spiralis
stanhopea
stelis
fly_orchid
vanda
blue_orchid, Vanda_caerulea
vanilla
vanilla_orchid, Vanilla_planifolia
yam, yam_plant
yam
white_yam, water_yam, Dioscorea_alata
cinnamon_vine, Chinese_yam, Dioscorea_batata
elephant's-foot, tortoise_plant, Hottentot_bread_vine, Hottentot's_bread_vine, Dioscorea_elephantipes
wild_yam, Dioscorea_paniculata
cush-cush, Dioscorea_trifida
black_bryony, black_bindweed, Tamus_communis
primrose, primula
English_primrose, Primula_vulgaris
cowslip, paigle, Primula_veris
oxlip, paigle, Primula_elatior
Chinese_primrose, Primula_sinensis
polyanthus, Primula_polyantha
pimpernel
scarlet_pimpernel, red_pimpernel, poor_man's_weatherglass, Anagallis_arvensis
bog_pimpernel, Anagallis_tenella
chaffweed, bastard_pimpernel, false_pimpernel
cyclamen, Cyclamen_purpurascens
sowbread, Cyclamen_hederifolium, Cyclamen_neopolitanum
sea_milkwort, sea_trifoly, black_saltwort, Glaux_maritima
featherfoil, feather-foil
water_gillyflower, American_featherfoil, Hottonia_inflata
water_violet, Hottonia_palustris
loosestrife
gooseneck_loosestrife, Lysimachia_clethroides_Duby
yellow_pimpernel, Lysimachia_nemorum
fringed_loosestrife, Lysimachia_ciliatum
moneywort, creeping_Jenny, creeping_Charlie, Lysimachia_nummularia
swamp_candles, Lysimachia_terrestris
whorled_loosestrife, Lysimachia_quadrifolia
water_pimpernel
brookweed, Samolus_valerandii
brookweed, Samolus_parviflorus, Samolus_floribundus
coralberry, spiceberry, Ardisia_crenata
marlberry, Ardisia_escallonoides, Ardisia_paniculata
plumbago
leadwort, Plumbago_europaea
thrift
sea_lavender, marsh_rosemary, statice
barbasco, joewood, Jacquinia_keyensis
gramineous_plant, graminaceous_plant
grass
midgrass
shortgrass, short-grass
sword_grass
tallgrass, tall-grass
herbage, pasturage
goat_grass, Aegilops_triuncalis
wheatgrass, wheat-grass
crested_wheatgrass, crested_wheat_grass, fairway_crested_wheat_grass, Agropyron_cristatum
bearded_wheatgrass, Agropyron_subsecundum
western_wheatgrass, bluestem_wheatgrass, Agropyron_smithii
intermediate_wheatgrass, Agropyron_intermedium, Elymus_hispidus
slender_wheatgrass, Agropyron_trachycaulum, Agropyron_pauciflorum, Elymus_trachycaulos
velvet_bent, velvet_bent_grass, brown_bent, Rhode_Island_bent, dog_bent, Agrostis_canina
cloud_grass, Agrostis_nebulosa
meadow_foxtail, Alopecurus_pratensis
foxtail, foxtail_grass
broom_grass
broom_sedge, Andropogon_virginicus
tall_oat_grass, tall_meadow_grass, evergreen_grass, false_oat, French_rye, Arrhenatherum_elatius
toetoe, toitoi, Arundo_conspicua, Chionochloa_conspicua
oat
cereal_oat, Avena_sativa
wild_oat, wild_oat_grass, Avena_fatua
slender_wild_oat, Avena_barbata
wild_red_oat, animated_oat, Avene_sterilis
brome, bromegrass
chess, cheat, Bromus_secalinus
field_brome, Bromus_arvensis
grama, grama_grass, gramma, gramma_grass
black_grama, Bouteloua_eriopoda
buffalo_grass, Buchloe_dactyloides
reed_grass
feather_reed_grass, feathertop, Calamagrostis_acutiflora
Australian_reed_grass, Calamagrostic_quadriseta
burgrass, bur_grass
buffel_grass, Cenchrus_ciliaris, Pennisetum_cenchroides
Rhodes_grass, Chloris_gayana
pampas_grass, Cortaderia_selloana
giant_star_grass, Cynodon_plectostachyum
orchard_grass, cocksfoot, cockspur, Dactylis_glomerata
Egyptian_grass, crowfoot_grass, Dactyloctenium_aegypticum
crabgrass, crab_grass, finger_grass
smooth_crabgrass, Digitaria_ischaemum
large_crabgrass, hairy_finger_grass, Digitaria_sanguinalis
barnyard_grass, barn_grass, barn_millet, Echinochloa_crusgalli
Japanese_millet, billion-dollar_grass, Japanese_barnyard_millet, sanwa_millet, Echinochloa_frumentacea
yardgrass, yard_grass, wire_grass, goose_grass, Eleusine_indica
finger_millet, ragi, ragee, African_millet, coracan, corakan, kurakkan, Eleusine_coracana
lyme_grass
wild_rye
giant_ryegrass, Elymus_condensatus, Leymus_condensatus
sea_lyme_grass, European_dune_grass, Elymus_arenarius, Leymus_arenaria
Canada_wild_rye, Elymus_canadensis
teff, teff_grass, Eragrostis_tef, Eragrostic_abyssinica
weeping_love_grass, African_love_grass, Eragrostis_curvula
plume_grass
Ravenna_grass, wool_grass, Erianthus_ravennae
fescue, fescue_grass, meadow_fescue, Festuca_elatior
reed_meadow_grass, Glyceria_grandis
velvet_grass, Yorkshire_fog, Holcus_lanatus
creeping_soft_grass, Holcus_mollis
barleycorn
barley_grass, wall_barley, Hordeum_murinum
little_barley, Hordeum_pusillum
rye_grass, ryegrass
perennial_ryegrass, English_ryegrass, Lolium_perenne
Italian_ryegrass, Italian_rye, Lolium_multiflorum
darnel, tare, bearded_darnel, cheat, Lolium_temulentum
nimblewill, nimble_Will, Muhlenbergia_schreberi
cultivated_rice, Oryza_sativa
ricegrass, rice_grass
smilo, smilo_grass, Oryzopsis_miliacea
switch_grass, Panicum_virgatum
broomcorn_millet, hog_millet, Panicum_miliaceum
goose_grass, Texas_millet, Panicum_Texanum
dallisgrass, dallis_grass, paspalum, Paspalum_dilatatum
Bahia_grass, Paspalum_notatum
knotgrass, Paspalum_distichum
fountain_grass, Pennisetum_ruppelii, Pennisetum_setaceum
reed_canary_grass, gardener's_garters, lady's_laces, ribbon_grass, Phalaris_arundinacea
canary_grass, birdseed_grass, Phalaris_canariensis
timothy, herd's_grass, Phleum_pratense
bluegrass, blue_grass
meadowgrass, meadow_grass
wood_meadowgrass, Poa_nemoralis, Agrostis_alba
noble_cane
munj, munja, Saccharum_bengalense, Saccharum_munja
broom_beard_grass, prairie_grass, wire_grass, Andropogon_scoparius, Schizachyrium_scoparium
bluestem, blue_stem, Andropogon_furcatus, Andropogon_gerardii
rye, Secale_cereale
bristlegrass, bristle_grass
giant_foxtail
yellow_bristlegrass, yellow_bristle_grass, yellow_foxtail, glaucous_bristlegrass, Setaria_glauca
green_bristlegrass, green_foxtail, rough_bristlegrass, bottle-grass, bottle_grass, Setaria_viridis
Siberian_millet, Setaria_italica_rubrofructa
German_millet, golden_wonder_millet, Setaria_italica_stramineofructa
millet
rattan, rattan_cane
malacca
reed
sorghum
grain_sorghum
durra, doura, dourah, Egyptian_corn, Indian_millet, Guinea_corn
feterita, federita, Sorghum_vulgare_caudatum
hegari
kaoliang
milo, milo_maize
shallu, Sorghum_vulgare_rosburghii
broomcorn, Sorghum_vulgare_technicum
cordgrass, cord_grass
salt_reed_grass, Spartina_cynosuroides
prairie_cordgrass, freshwater_cordgrass, slough_grass, Spartina_pectinmata
smut_grass, blackseed, carpet_grass, Sporobolus_poiretii
sand_dropseed, Sporobolus_cryptandrus
rush_grass, rush-grass
St._Augustine_grass, Stenotaphrum_secundatum, buffalo_grass
grain
cereal, cereal_grass
wheat
wheat_berry
durum, durum_wheat, hard_wheat, Triticum_durum, Triticum_turgidum, macaroni_wheat
spelt, Triticum_spelta, Triticum_aestivum_spelta
emmer, starch_wheat, two-grain_spelt, Triticum_dicoccum
wild_wheat, wild_emmer, Triticum_dicoccum_dicoccoides
corn, maize, Indian_corn, Zea_mays
mealie
corn
dent_corn, Zea_mays_indentata
flint_corn, flint_maize, Yankee_corn, Zea_mays_indurata
popcorn, Zea_mays_everta
zoysia
Manila_grass, Japanese_carpet_grass, Zoysia_matrella
Korean_lawn_grass, Japanese_lawn_grass, Zoysia_japonica
bamboo
common_bamboo, Bambusa_vulgaris
giant_bamboo, kyo-chiku, Dendrocalamus_giganteus
umbrella_plant, umbrella_sedge, Cyperus_alternifolius
chufa, yellow_nutgrass, earth_almond, ground_almond, rush_nut, Cyperus_esculentus
galingale, galangal, Cyperus_longus
nutgrass, nut_grass, nutsedge, nut_sedge, Cyperus_rotundus
sand_sedge, sand_reed, Carex_arenaria
cypress_sedge, Carex_pseudocyperus
cotton_grass, cotton_rush
common_cotton_grass, Eriophorum_angustifolium
hardstem_bulrush, hardstemmed_bulrush, Scirpus_acutus
wool_grass, Scirpus_cyperinus
spike_rush
water_chestnut, Chinese_water_chestnut, Eleocharis_dulcis
needle_spike_rush, needle_rush, slender_spike_rush, hair_grass, Eleocharis_acicularis
creeping_spike_rush, Eleocharis_palustris
pandanus, screw_pine
textile_screw_pine, lauhala, Pandanus_tectorius
cattail
cat's-tail, bullrush, bulrush, nailrod, reed_mace, reedmace, Typha_latifolia
bur_reed
grain, caryopsis
kernel
rye
gourd, gourd_vine
gourd
pumpkin, pumpkin_vine, autumn_pumpkin, Cucurbita_pepo
squash, squash_vine
summer_squash, summer_squash_vine, Cucurbita_pepo_melopepo
yellow_squash
marrow, marrow_squash, vegetable_marrow
zucchini, courgette
cocozelle, Italian_vegetable_marrow
cymling, pattypan_squash
spaghetti_squash
winter_squash, winter_squash_plant
acorn_squash
hubbard_squash, Cucurbita_maxima
turban_squash, Cucurbita_maxima_turbaniformis
buttercup_squash
butternut_squash, Cucurbita_maxima
winter_crookneck, winter_crookneck_squash, Cucurbita_moschata
cushaw, Cucurbita_mixta, Cucurbita_argyrosperma
prairie_gourd, prairie_gourd_vine, Missouri_gourd, wild_pumpkin, buffalo_gourd, calabazilla, Cucurbita_foetidissima
prairie_gourd
bryony, briony
white_bryony, devil's_turnip, Bryonia_alba
sweet_melon, muskmelon, sweet_melon_vine, Cucumis_melo
cantaloupe, cantaloup, cantaloupe_vine, cantaloup_vine, Cucumis_melo_cantalupensis
winter_melon, Persian_melon, honeydew_melon, winter_melon_vine, Cucumis_melo_inodorus
net_melon, netted_melon, nutmeg_melon, Cucumis_melo_reticulatus
cucumber, cucumber_vine, Cucumis_sativus
squirting_cucumber, exploding_cucumber, touch-me-not, Ecballium_elaterium
bottle_gourd, calabash, Lagenaria_siceraria
luffa, dishcloth_gourd, sponge_gourd, rag_gourd, strainer_vine
loofah, vegetable_sponge, Luffa_cylindrica
angled_loofah, sing-kwa, Luffa_acutangula
loofa, loofah, luffa, loufah_sponge
balsam_apple, Momordica_balsamina
balsam_pear, Momordica_charantia
lobelia
water_lobelia, Lobelia_dortmanna
mallow
musk_mallow, mus_rose, Malva_moschata
common_mallow, Malva_neglecta
okra, gumbo, okra_plant, lady's-finger, Abelmoschus_esculentus, Hibiscus_esculentus
okra
abelmosk, musk_mallow, Abelmoschus_moschatus, Hibiscus_moschatus
flowering_maple
velvetleaf, velvet-leaf, velvetweed, Indian_mallow, butter-print, China_jute, Abutilon_theophrasti
hollyhock
rose_mallow, Alcea_rosea, Althea_rosea
althea, althaea, hollyhock
marsh_mallow, white_mallow, Althea_officinalis
poppy_mallow
fringed_poppy_mallow, Callirhoe_digitata
purple_poppy_mallow, Callirhoe_involucrata
clustered_poppy_mallow, Callirhoe_triangulata
sea_island_cotton, tree_cotton, Gossypium_barbadense
Levant_cotton, Gossypium_herbaceum
upland_cotton, Gossypium_hirsutum
Peruvian_cotton, Gossypium_peruvianum
wild_cotton, Arizona_wild_cotton, Gossypium_thurberi
kenaf, kanaf, deccan_hemp, bimli, bimli_hemp, Indian_hemp, Bombay_hemp, Hibiscus_cannabinus
sorrel_tree, Hibiscus_heterophyllus
rose_mallow, swamp_mallow, common_rose_mallow, swamp_rose_mallow, Hibiscus_moscheutos
cotton_rose, Confederate_rose, Confederate_rose_mallow, Hibiscus_mutabilis
roselle, rozelle, sorrel, red_sorrel, Jamaica_sorrel, Hibiscus_sabdariffa
mahoe, majagua, mahagua, balibago, purau, Hibiscus_tiliaceus
flower-of-an-hour, flowers-of-an-hour, bladder_ketmia, black-eyed_Susan, Hibiscus_trionum
lacebark, ribbonwood, houhere, Hoheria_populnea
wild_hollyhock, Iliamna_remota, Sphaeralcea_remota
mountain_hollyhock, Iliamna_ruvularis, Iliamna_acerifolia
seashore_mallow
salt_marsh_mallow, Kosteletzya_virginica
chaparral_mallow, Malacothamnus_fasciculatus, Sphaeralcea_fasciculata
malope, Malope_trifida
false_mallow
waxmallow, wax_mallow, sleeping_hibiscus
glade_mallow, Napaea_dioica
pavonia
ribbon_tree, ribbonwood, Plagianthus_regius, Plagianthus_betulinus
bush_hibiscus, Radyera_farragei, Hibiscus_farragei
Virginia_mallow, Sida_hermaphrodita
Queensland_hemp, jellyleaf, Sida_rhombifolia
Indian_mallow, Sida_spinosa
checkerbloom, wild_hollyhock, Sidalcea_malviflora
globe_mallow, false_mallow
prairie_mallow, red_false_mallow, Sphaeralcea_coccinea, Malvastrum_coccineum
tulipwood_tree
portia_tree, bendy_tree, seaside_mahoe, Thespesia_populnea
red_silk-cotton_tree, simal, Bombax_ceiba, Bombax_malabarica
cream-of-tartar_tree, sour_gourd, Adansonia_gregorii
baobab, monkey-bread_tree, Adansonia_digitata
kapok, ceiba_tree, silk-cotton_tree, white_silk-cotton_tree, Bombay_ceiba, God_tree, Ceiba_pentandra
durian, durion, durian_tree, Durio_zibethinus
Montezuma
shaving-brush_tree, Pseudobombax_ellipticum
quandong, quandong_tree, Brisbane_quandong, silver_quandong_tree, blue_fig, Elaeocarpus_grandis
quandong, blue_fig
makomako, New_Zealand_wine_berry, wineberry, Aristotelia_serrata, Aristotelia_racemosa
Jamaican_cherry, calabur_tree, calabura, silk_wood, silkwood, Muntingia_calabura
breakax, breakaxe, break-axe, Sloanea_jamaicensis
sterculia
Panama_tree, Sterculia_apetala
kalumpang, Java_olives, Sterculia_foetida
bottle-tree, bottle_tree
flame_tree, flame_durrajong, Brachychiton_acerifolius, Sterculia_acerifolia
flame_tree, broad-leaved_bottletree, Brachychiton_australis
kurrajong, currajong, Brachychiton_populneus
Queensland_bottletree, narrow-leaved_bottletree, Brachychiton_rupestris, Sterculia_rupestris
kola, kola_nut, kola_nut_tree, goora_nut, Cola_acuminata
kola_nut, cola_nut
Chinese_parasol_tree, Chinese_parasol, Japanese_varnish_tree, phoenix_tree, Firmiana_simplex
flannelbush, flannel_bush, California_beauty
screw_tree
nut-leaved_screw_tree, Helicteres_isora
red_beech, brown_oak, booyong, crow's_foot, stave_wood, silky_elm, Heritiera_trifoliolata, Terrietia_trifoliolata
looking_glass_tree, Heritiera_macrophylla
looking-glass_plant, Heritiera_littoralis
honey_bell, honeybells, Hermannia_verticillata, Mahernia_verticillata
mayeng, maple-leaved_bayur, Pterospermum_acerifolium
silver_tree, Tarrietia_argyrodendron
cacao, cacao_tree, chocolate_tree, Theobroma_cacao
obeche, obechi, arere, samba, Triplochiton_scleroxcylon
linden, linden_tree, basswood, lime, lime_tree
American_basswood, American_lime, Tilia_americana
small-leaved_linden, small-leaved_lime, Tilia_cordata
white_basswood, cottonwood, Tilia_heterophylla
Japanese_linden, Japanese_lime, Tilia_japonica
silver_lime, silver_linden, Tilia_tomentosa
corchorus
African_hemp, Sparmannia_africana
herb, herbaceous_plant
protea
honeypot, king_protea, Protea_cynaroides
honeyflower, honey-flower, Protea_mellifera
banksia
honeysuckle, Australian_honeysuckle, coast_banksia, Banksia_integrifolia
smoke_bush
Chilean_firebush, Chilean_flameflower, Embothrium_coccineum
Chilean_nut, Chile_nut, Chile_hazel, Chilean_hazelnut, Guevina_heterophylla, Guevina_avellana
grevillea
red-flowered_silky_oak, Grevillea_banksii
silky_oak, Grevillea_robusta
beefwood, Grevillea_striata
cushion_flower, pincushion_hakea, Hakea_laurina
rewa-rewa, New_Zealand_honeysuckle
honeyflower, honey-flower, mountain_devil, Lambertia_formosa
silver_tree, Leucadendron_argenteum
lomatia
macadamia, macadamia_tree
Macadamia_integrifolia
macadamia_nut, macadamia_nut_tree, Macadamia_ternifolia
Queensland_nut, Macadamia_tetraphylla
prickly_ash, Orites_excelsa
geebung
wheel_tree, firewheel_tree, Stenocarpus_sinuatus
scrub_beefwood, beefwood, Stenocarpus_salignus
waratah, Telopea_Oreades
waratah, Telopea_speciosissima
casuarina
she-oak
beefwood
Australian_pine, Casuarina_equisetfolia
heath
tree_heath, briar, brier, Erica_arborea
briarroot
winter_heath, spring_heath, Erica_carnea
bell_heather, heather_bell, fine-leaved_heath, Erica_cinerea
Cornish_heath, Erica_vagans
Spanish_heath, Portuguese_heath, Erica_lusitanica
Prince-of-Wales'-heath, Prince_of_Wales_heath, Erica_perspicua
bog_rosemary, moorwort, Andromeda_glaucophylla
marsh_andromeda, common_bog_rosemary, Andromeda_polifolia
madrona, madrono, manzanita, Arbutus_menziesii
strawberry_tree, Irish_strawberry, Arbutus_unedo
bearberry
alpine_bearberry, black_bearberry, Arctostaphylos_alpina
heartleaf_manzanita, Arctostaphylos_andersonii
Parry_manzanita, Arctostaphylos_manzanita
spike_heath, Bruckenthalia_spiculifolia
bryanthus
leatherleaf, Chamaedaphne_calyculata
Connemara_heath, St._Dabeoc's_heath, Daboecia_cantabrica
trailing_arbutus, mayflower, Epigaea_repens
creeping_snowberry, moxie_plum, maidenhair_berry, Gaultheria_hispidula
salal, shallon, Gaultheria_shallon
huckleberry
black_huckleberry, Gaylussacia_baccata
dangleberry, dangle-berry, Gaylussacia_frondosa
box_huckleberry, Gaylussacia_brachycera
kalmia
mountain_laurel, wood_laurel, American_laurel, calico_bush, Kalmia_latifolia
swamp_laurel, bog_laurel, bog_kalmia, Kalmia_polifolia
trapper's_tea, glandular_Labrador_tea
wild_rosemary, marsh_tea, Ledum_palustre
sand_myrtle, Leiophyllum_buxifolium
leucothoe
dog_laurel, dog_hobble, switch-ivy, Leucothoe_fontanesiana, Leucothoe_editorum
sweet_bells, Leucothoe_racemosa
alpine_azalea, mountain_azalea, Loiseleuria_procumbens
staggerbush, stagger_bush, Lyonia_mariana
maleberry, male_berry, privet_andromeda, he-huckleberry, Lyonia_ligustrina
fetterbush, fetter_bush, shiny_lyonia, Lyonia_lucida
false_azalea, fool's_huckleberry, Menziesia_ferruginea
minniebush, minnie_bush, Menziesia_pilosa
sorrel_tree, sourwood, titi, Oxydendrum_arboreum
mountain_heath, Phyllodoce_caerulea, Bryanthus_taxifolius
purple_heather, Brewer's_mountain_heather, Phyllodoce_breweri
fetterbush, mountain_fetterbush, mountain_andromeda, Pieris_floribunda
rhododendron
coast_rhododendron, Rhododendron_californicum
rosebay, Rhododendron_maxima
swamp_azalea, swamp_honeysuckle, white_honeysuckle, Rhododendron_viscosum
azalea
cranberry
American_cranberry, large_cranberry, Vaccinium_macrocarpon
European_cranberry, small_cranberry, Vaccinium_oxycoccus
blueberry, blueberry_bush
farkleberry, sparkleberry, Vaccinium_arboreum
low-bush_blueberry, low_blueberry, Vaccinium_angustifolium, Vaccinium_pennsylvanicum
rabbiteye_blueberry, rabbit-eye_blueberry, rabbiteye, Vaccinium_ashei
dwarf_bilberry, dwarf_blueberry, Vaccinium_caespitosum
evergreen_blueberry, Vaccinium_myrsinites
evergreen_huckleberry, Vaccinium_ovatum
bilberry, thin-leaved_bilberry, mountain_blue_berry, Viccinium_membranaceum
bilberry, whortleberry, whinberry, blaeberry, Viccinium_myrtillus
bog_bilberry, bog_whortleberry, moor_berry, Vaccinium_uliginosum_alpinum
dryland_blueberry, dryland_berry, Vaccinium_pallidum
grouseberry, grouse-berry, grouse_whortleberry, Vaccinium_scoparium
deerberry, squaw_huckleberry, Vaccinium_stamineum
cowberry, mountain_cranberry, lingonberry, lingenberry, lingberry, foxberry, Vaccinium_vitis-idaea
diapensia
galax, galaxy, wandflower, beetleweed, coltsfoot, Galax_urceolata
pyxie, pixie, pixy, Pyxidanthera_barbulata
shortia
oconee_bells, Shortia_galacifolia
Australian_heath
epacris
common_heath, Epacris_impressa
common_heath, blunt-leaf_heath, Epacris_obtusifolia
Port_Jackson_heath, Epacris_purpurascens
native_cranberry, groundberry, ground-berry, cranberry_heath, Astroloma_humifusum, Styphelia_humifusum
pink_fivecorner, Styphelia_triflora
wintergreen, pyrola
false_wintergreen, Pyrola_americana, Pyrola_rotundifolia_americana
lesser_wintergreen, Pyrola_minor
wild_lily_of_the_valley, shinleaf, Pyrola_elliptica
wild_lily_of_the_valley, Pyrola_rotundifolia
pipsissewa, prince's_pine
love-in-winter, western_prince's_pine, Chimaphila_umbellata, Chimaphila_corymbosa
one-flowered_wintergreen, one-flowered_pyrola, Moneses_uniflora, Pyrola_uniflora
Indian_pipe, waxflower, Monotropa_uniflora
pinesap, false_beachdrops, Monotropa_hypopithys
beech, beech_tree
common_beech, European_beech, Fagus_sylvatica
copper_beech, purple_beech, Fagus_sylvatica_atropunicea, Fagus_purpurea, Fagus_sylvatica_purpurea
American_beech, white_beech, red_beech, Fagus_grandifolia, Fagus_americana
weeping_beech, Fagus_pendula, Fagus_sylvatica_pendula
Japanese_beech
chestnut, chestnut_tree
American_chestnut, American_sweet_chestnut, Castanea_dentata
European_chestnut, sweet_chestnut, Spanish_chestnut, Castanea_sativa
Chinese_chestnut, Castanea_mollissima
Japanese_chestnut, Castanea_crenata
Allegheny_chinkapin, eastern_chinquapin, chinquapin, dwarf_chestnut, Castanea_pumila
Ozark_chinkapin, Ozark_chinquapin, chinquapin, Castanea_ozarkensis
oak_chestnut
giant_chinkapin, golden_chinkapin, Chrysolepis_chrysophylla, Castanea_chrysophylla, Castanopsis_chrysophylla
dwarf_golden_chinkapin, Chrysolepis_sempervirens
tanbark_oak, Lithocarpus_densiflorus
Japanese_oak, Lithocarpus_glabra, Lithocarpus_glaber
southern_beech, evergreen_beech
myrtle_beech, Nothofagus_cuninghamii
Coigue, Nothofagus_dombeyi
New_Zealand_beech
silver_beech, Nothofagus_menziesii
roble_beech, Nothofagus_obliqua
rauli_beech, Nothofagus_procera
black_beech, Nothofagus_solanderi
hard_beech, Nothofagus_truncata
acorn
cupule, acorn_cup
oak, oak_tree
live_oak
coast_live_oak, California_live_oak, Quercus_agrifolia
white_oak
American_white_oak, Quercus_alba
Arizona_white_oak, Quercus_arizonica
swamp_white_oak, swamp_oak, Quercus_bicolor
European_turkey_oak, turkey_oak, Quercus_cerris
canyon_oak, canyon_live_oak, maul_oak, iron_oak, Quercus_chrysolepis
scarlet_oak, Quercus_coccinea
jack_oak, northern_pin_oak, Quercus_ellipsoidalis
red_oak
southern_red_oak, swamp_red_oak, turkey_oak, Quercus_falcata
Oregon_white_oak, Oregon_oak, Garry_oak, Quercus_garryana
holm_oak, holm_tree, holly-leaved_oak, evergreen_oak, Quercus_ilex
bear_oak, Quercus_ilicifolia
shingle_oak, laurel_oak, Quercus_imbricaria
bluejack_oak, turkey_oak, Quercus_incana
California_black_oak, Quercus_kelloggii
American_turkey_oak, turkey_oak, Quercus_laevis
laurel_oak, pin_oak, Quercus_laurifolia
California_white_oak, valley_oak, valley_white_oak, roble, Quercus_lobata
overcup_oak, Quercus_lyrata
bur_oak, burr_oak, mossy-cup_oak, mossycup_oak, Quercus_macrocarpa
scrub_oak
blackjack_oak, blackjack, jack_oak, Quercus_marilandica
swamp_chestnut_oak, Quercus_michauxii
Japanese_oak, Quercus_mongolica, Quercus_grosseserrata
chestnut_oak
chinquapin_oak, chinkapin_oak, yellow_chestnut_oak, Quercus_muehlenbergii
myrtle_oak, seaside_scrub_oak, Quercus_myrtifolia
water_oak, possum_oak, Quercus_nigra
Nuttall_oak, Nuttall's_oak, Quercus_nuttalli
durmast, Quercus_petraea, Quercus_sessiliflora
basket_oak, cow_oak, Quercus_prinus, Quercus_montana
pin_oak, swamp_oak, Quercus_palustris
willow_oak, Quercus_phellos
dwarf_chinkapin_oak, dwarf_chinquapin_oak, dwarf_oak, Quercus_prinoides
common_oak, English_oak, pedunculate_oak, Quercus_robur
northern_red_oak, Quercus_rubra, Quercus_borealis
Shumard_oak, Shumard_red_oak, Quercus_shumardii
post_oak, box_white_oak, brash_oak, iron_oak, Quercus_stellata
cork_oak, Quercus_suber
Spanish_oak, Quercus_texana
huckleberry_oak, Quercus_vaccinifolia
Chinese_cork_oak, Quercus_variabilis
black_oak, yellow_oak, quercitron, quercitron_oak, Quercus_velutina
southern_live_oak, Quercus_virginiana
interior_live_oak, Quercus_wislizenii, Quercus_wizlizenii
mast
birch, birch_tree
yellow_birch, Betula_alleghaniensis, Betula_leutea
American_white_birch, paper_birch, paperbark_birch, canoe_birch, Betula_cordifolia, Betula_papyrifera
grey_birch, gray_birch, American_grey_birch, American_gray_birch, Betula_populifolia
silver_birch, common_birch, European_white_birch, Betula_pendula
downy_birch, white_birch, Betula_pubescens
black_birch, river_birch, red_birch, Betula_nigra
sweet_birch, cherry_birch, black_birch, Betula_lenta
Yukon_white_birch, Betula_neoalaskana
swamp_birch, water_birch, mountain_birch, Western_paper_birch, Western_birch, Betula_fontinalis
Newfoundland_dwarf_birch, American_dwarf_birch, Betula_glandulosa
alder, alder_tree
common_alder, European_black_alder, Alnus_glutinosa, Alnus_vulgaris
grey_alder, gray_alder, Alnus_incana
seaside_alder, Alnus_maritima
white_alder, mountain_alder, Alnus_rhombifolia
red_alder, Oregon_alder, Alnus_rubra
speckled_alder, Alnus_rugosa
smooth_alder, hazel_alder, Alnus_serrulata
green_alder, Alnus_veridis
green_alder, Alnus_veridis_crispa, Alnus_crispa
hornbeam
European_hornbeam, Carpinus_betulus
American_hornbeam, Carpinus_caroliniana
hop_hornbeam
Old_World_hop_hornbeam, Ostrya_carpinifolia
Eastern_hop_hornbeam, ironwood, ironwood_tree, Ostrya_virginiana
hazelnut, hazel, hazelnut_tree
American_hazel, Corylus_americana
cobnut, filbert, Corylus_avellana, Corylus_avellana_grandis
beaked_hazelnut, Corylus_cornuta
centaury
rosita, Centaurium_calycosum
lesser_centaury, Centaurium_minus
seaside_centaury
slender_centaury
prairie_gentian, tulip_gentian, bluebell, Eustoma_grandiflorum
Persian_violet, Exacum_affine
columbo, American_columbo, deer's-ear, deer's-ears, pyramid_plant, American_gentian
gentian
gentianella, Gentiana_acaulis
closed_gentian, blind_gentian, bottle_gentian, Gentiana_andrewsii
explorer's_gentian, Gentiana_calycosa
closed_gentian, blind_gentian, Gentiana_clausa
great_yellow_gentian, Gentiana_lutea
marsh_gentian, calathian_violet, Gentiana_pneumonanthe
soapwort_gentian, Gentiana_saponaria
striped_gentian, Gentiana_villosa
agueweed, ague_weed, five-flowered_gentian, stiff_gentian, Gentianella_quinquefolia, Gentiana_quinquefolia
felwort, gentianella_amarella
fringed_gentian
Gentianopsis_crinita, Gentiana_crinita
Gentianopsis_detonsa, Gentiana_detonsa
Gentianopsid_procera, Gentiana_procera
Gentianopsis_thermalis, Gentiana_thermalis
tufted_gentian, Gentianopsis_holopetala, Gentiana_holopetala
spurred_gentian
sabbatia
toothbrush_tree, mustard_tree, Salvadora_persica
olive_tree
olive, European_olive_tree, Olea_europaea
olive
black_maire, Olea_cunninghamii
white_maire, Olea_lanceolata
fringe_tree
fringe_bush, Chionanthus_virginicus
forestiera
forsythia
ash, ash_tree
white_ash, Fraxinus_Americana
swamp_ash, Fraxinus_caroliniana
flowering_ash, Fraxinus_cuspidata
European_ash, common_European_ash, Fraxinus_excelsior
Oregon_ash, Fraxinus_latifolia, Fraxinus_oregona
black_ash, basket_ash, brown_ash, hoop_ash, Fraxinus_nigra
manna_ash, flowering_ash, Fraxinus_ornus
red_ash, downy_ash, Fraxinus_pennsylvanica
green_ash, Fraxinus_pennsylvanica_subintegerrima
blue_ash, Fraxinus_quadrangulata
mountain_ash, Fraxinus_texensis
pumpkin_ash, Fraxinus_tomentosa
Arizona_ash, Fraxinus_velutina
jasmine
primrose_jasmine, Jasminum_mesnyi
winter_jasmine, Jasminum_nudiflorum
common_jasmine, true_jasmine, jessamine, Jasminum_officinale
privet
Amur_privet, Ligustrum_amurense
Japanese_privet, Ligustrum_japonicum
Ligustrum_obtusifolium
common_privet, Ligustrum_vulgare
devilwood, American_olive, Osmanthus_americanus
mock_privet
lilac
Himalayan_lilac, Syringa_emodi
Persian_lilac, Syringa_persica
Japanese_tree_lilac, Syringa_reticulata, Syringa_amurensis_japonica
Japanese_lilac, Syringa_villosa
common_lilac, Syringa_vulgaris
bloodwort
kangaroo_paw, kangaroo's_paw, kangaroo's-foot, kangaroo-foot_plant, Australian_sword_lily, Anigozanthus_manglesii
Virginian_witch_hazel, Hamamelis_virginiana
vernal_witch_hazel, Hamamelis_vernalis
winter_hazel, flowering_hazel
fothergilla, witch_alder
liquidambar
sweet_gum, sweet_gum_tree, bilsted, red_gum, American_sweet_gum, Liquidambar_styraciflua
iron_tree, iron-tree, ironwood, ironwood_tree
walnut, walnut_tree
California_black_walnut, Juglans_californica
butternut, butternut_tree, white_walnut, Juglans_cinerea
black_walnut, black_walnut_tree, black_hickory, Juglans_nigra
English_walnut, English_walnut_tree, Circassian_walnut, Persian_walnut, Juglans_regia
hickory, hickory_tree
water_hickory, bitter_pecan, water_bitternut, Carya_aquatica
pignut, pignut_hickory, brown_hickory, black_hickory, Carya_glabra
bitternut, bitternut_hickory, bitter_hickory, bitter_pignut, swamp_hickory, Carya_cordiformis
pecan, pecan_tree, Carya_illinoensis, Carya_illinoinsis
big_shellbark, big_shellbark_hickory, big_shagbark, king_nut, king_nut_hickory, Carya_laciniosa
nutmeg_hickory, Carya_myristicaeformis, Carya_myristiciformis
shagbark, shagbark_hickory, shellbark, shellbark_hickory, Carya_ovata
mockernut, mockernut_hickory, black_hickory, white-heart_hickory, big-bud_hickory, Carya_tomentosa
wing_nut, wing-nut
Caucasian_walnut, Pterocarya_fraxinifolia
dhawa, dhava
combretum
hiccup_nut, hiccough_nut, Combretum_bracteosum
bush_willow, Combretum_appiculatum
bush_willow, Combretum_erythrophyllum
button_tree, button_mangrove, Conocarpus_erectus
white_mangrove, Laguncularia_racemosa
oleaster
water_milfoil
anchovy_pear, anchovy_pear_tree, Grias_cauliflora
brazil_nut, brazil-nut_tree, Bertholletia_excelsa
loosestrife
purple_loosestrife, spiked_loosestrife, Lythrum_salicaria
grass_poly, hyssop_loosestrife, Lythrum_hyssopifolia
crape_myrtle, crepe_myrtle, crepe_flower, Lagerstroemia_indica
Queen's_crape_myrtle, pride-of-India, Lagerstroemia_speciosa
myrtaceous_tree
myrtle
common_myrtle, Myrtus_communis
bayberry, bay-rum_tree, Jamaica_bayberry, wild_cinnamon, Pimenta_acris
allspice, allspice_tree, pimento_tree, Pimenta_dioica
allspice_tree, Pimenta_officinalis
sour_cherry, Eugenia_corynantha
nakedwood, Eugenia_dicrana
Surinam_cherry, pitanga, Eugenia_uniflora
rose_apple, rose-apple_tree, jambosa, Eugenia_jambos
feijoa, feijoa_bush
jaboticaba, jaboticaba_tree, Myrciaria_cauliflora
guava, true_guava, guava_bush, Psidium_guajava
guava, strawberry_guava, yellow_cattley_guava, Psidium_littorale
cattley_guava, purple_strawberry_guava, Psidium_cattleianum, Psidium_littorale_longipes
Brazilian_guava, Psidium_guineense
gum_tree, gum
eucalyptus, eucalypt, eucalyptus_tree
flooded_gum
mallee
stringybark
smoothbark
red_gum, peppermint, peppermint_gum, Eucalyptus_amygdalina
red_gum, marri, Eucalyptus_calophylla
river_red_gum, river_gum, Eucalyptus_camaldulensis, Eucalyptus_rostrata
mountain_swamp_gum, Eucalyptus_camphora
snow_gum, ghost_gum, white_ash, Eucalyptus_coriacea, Eucalyptus_pauciflora
alpine_ash, mountain_oak, Eucalyptus_delegatensis
white_mallee, congoo_mallee, Eucalyptus_dumosa
white_stringybark, thin-leaved_stringybark, Eucalyptusd_eugenioides
white_mountain_ash, Eucalyptus_fraxinoides
blue_gum, fever_tree, Eucalyptus_globulus
rose_gum, Eucalypt_grandis
cider_gum, Eucalypt_gunnii
swamp_gum, Eucalypt_ovata
spotted_gum, Eucalyptus_maculata
lemon-scented_gum, Eucalyptus_citriodora, Eucalyptus_maculata_citriodora
black_mallee, black_sally, black_gum, Eucalytus_stellulata
forest_red_gum, Eucalypt_tereticornis
mountain_ash, Eucalyptus_regnans
manna_gum, Eucalyptus_viminalis
clove, clove_tree, Syzygium_aromaticum, Eugenia_aromaticum, Eugenia_caryophyllatum
clove
tupelo, tupelo_tree
water_gum, Nyssa_aquatica
sour_gum, black_gum, pepperidge, Nyssa_sylvatica
enchanter's_nightshade
Circaea_lutetiana
willowherb
fireweed, giant_willowherb, rosebay_willowherb, wickup, Epilobium_angustifolium
California_fuchsia, humming_bird's_trumpet, Epilobium_canum_canum, Zauschneria_californica
fuchsia
lady's-eardrop, ladies'-eardrop, lady's-eardrops, ladies'-eardrops, Fuchsia_coccinea
evening_primrose
common_evening_primrose, German_rampion, Oenothera_biennis
sundrops, Oenothera_fruticosa
Missouri_primrose, Ozark_sundrops, Oenothera_macrocarpa
pomegranate, pomegranate_tree, Punica_granatum
mangrove, Rhizophora_mangle
daphne
garland_flower, Daphne_cneorum
spurge_laurel, wood_laurel, Daphne_laureola
mezereon, February_daphne, Daphne_mezereum
Indian_rhododendron, Melastoma_malabathricum
Medinilla_magnifica
deer_grass, meadow_beauty
canna
achira, indian_shot, arrowroot, Canna_indica, Canna_edulis
arrowroot, American_arrowroot, obedience_plant, Maranta_arundinaceae
banana, banana_tree
dwarf_banana, Musa_acuminata
Japanese_banana, Musa_basjoo
plantain, plantain_tree, Musa_paradisiaca
edible_banana, Musa_paradisiaca_sapientum
abaca, Manila_hemp, Musa_textilis
Abyssinian_banana, Ethiopian_banana, Ensete_ventricosum, Musa_ensete
ginger
common_ginger, Canton_ginger, stem_ginger, Zingiber_officinale
turmeric, Curcuma_longa, Curcuma_domestica
galangal, Alpinia_galanga
shellflower, shall-flower, shell_ginger, Alpinia_Zerumbet, Alpinia_speciosa, Languas_speciosa
grains_of_paradise, Guinea_grains, Guinea_pepper, melagueta_pepper, Aframomum_melegueta
cardamom, cardamon, Elettaria_cardamomum
begonia
fibrous-rooted_begonia
tuberous_begonia
rhizomatous_begonia
Christmas_begonia, blooming-fool_begonia, Begonia_cheimantha
angel-wing_begonia, Begonia_cocchinea
beefsteak_begonia, kidney_begonia, Begonia_erythrophylla, Begonia_feastii
star_begonia, star-leaf_begonia, Begonia_heracleifolia
rex_begonia, king_begonia, painted-leaf_begonia, beefsteak_geranium, Begonia_rex
wax_begonia, Begonia_semperflorens
Socotra_begonia, Begonia_socotrana
hybrid_tuberous_begonia, Begonia_tuberhybrida
dillenia
guinea_gold_vine, guinea_flower
poon
calaba, Santa_Maria_tree, Calophyllum_calaba
Maria, Calophyllum_longifolium
laurelwood, lancewood_tree, Calophyllum_candidissimum
Alexandrian_laurel, Calophyllum_inophyllum
clusia
wild_fig, Clusia_flava
waxflower, Clusia_insignis
pitch_apple, strangler_fig, Clusia_rosea, Clusia_major
mangosteen, mangosteen_tree, Garcinia_mangostana
gamboge_tree, Garcinia_hanburyi, Garcinia_cambogia, Garcinia_gummi-gutta
St_John's_wort
common_St_John's_wort, tutsan, Hypericum_androsaemum
great_St_John's_wort, Hypericum_ascyron, Hypericum_pyramidatum
creeping_St_John's_wort, Hypericum_calycinum
low_St_Andrew's_cross, Hypericum_hypericoides
klammath_weed, Hypericum_perforatum
shrubby_St_John's_wort, Hypericum_prolificum, Hypericum_spathulatum
St_Peter's_wort, Hypericum_tetrapterum, Hypericum_maculatum
marsh_St-John's_wort, Hypericum_virginianum
mammee_apple, mammee, mamey, mammee_tree, Mammea_americana
rose_chestnut, ironwood, ironwood_tree, Mesua_ferrea
bower_actinidia, tara_vine, Actinidia_arguta
Chinese_gooseberry, kiwi, kiwi_vine, Actinidia_chinensis, Actinidia_deliciosa
silvervine, silver_vine, Actinidia_polygama
wild_cinnamon, white_cinnamon_tree, Canella_winterana, Canella-alba
papaya, papaia, pawpaw, papaya_tree, melon_tree, Carica_papaya
souari, souari_nut, souari_tree, Caryocar_nuciferum
rockrose, rock_rose
white-leaved_rockrose, Cistus_albidus
common_gum_cistus, Cistus_ladanifer, Cistus_ladanum
frostweed, frost-weed, frostwort, Helianthemum_canadense, Crocanthemum_canadense
dipterocarp
red_lauan, red_lauan_tree, Shorea_teysmanniana
governor's_plum, governor_plum, Madagascar_plum, ramontchi, batoko_palm, Flacourtia_indica
kei_apple, kei_apple_bush, Dovyalis_caffra
ketembilla, kitembilla, kitambilla, ketembilla_tree, Ceylon_gooseberry, Dovyalis_hebecarpa
chaulmoogra, chaulmoogra_tree, chaulmugra, Hydnocarpus_kurzii, Taraktagenos_kurzii, Taraktogenos_kurzii
wild_peach, Kiggelaria_africana
candlewood
boojum_tree, cirio, Fouquieria_columnaris, Idria_columnaris
bird's-eye_bush, Ochna_serrulata
granadilla, purple_granadillo, Passiflora_edulis
granadilla, sweet_granadilla, Passiflora_ligularis
granadilla, giant_granadilla, Passiflora_quadrangularis
maypop, Passiflora_incarnata
Jamaica_honeysuckle, yellow_granadilla, Passiflora_laurifolia
banana_passion_fruit, Passiflora_mollissima
sweet_calabash, Passiflora_maliformis
love-in-a-mist, running_pop, wild_water_lemon, Passiflora_foetida
reseda
mignonette, sweet_reseda, Reseda_odorata
dyer's_rocket, dyer's_mignonette, weld, Reseda_luteola
false_tamarisk, German_tamarisk, Myricaria_germanica
halophyte
viola
violet
field_pansy, heartsease, Viola_arvensis
American_dog_violet, Viola_conspersa
dog_violet, heath_violet, Viola_canina
horned_violet, tufted_pansy, Viola_cornuta
two-eyed_violet, heartsease, Viola_ocellata
bird's-foot_violet, pansy_violet, Johnny-jump-up, wood_violet, Viola_pedata
downy_yellow_violet, Viola_pubescens
long-spurred_violet, Viola_rostrata
pale_violet, striped_violet, cream_violet, Viola_striata
hedge_violet, wood_violet, Viola_sylvatica, Viola_reichenbachiana
nettle
stinging_nettle, Urtica_dioica
Roman_nettle, Urtica_pipulifera
ramie, ramee, Chinese_silk_plant, China_grass, Boehmeria_nivea
wood_nettle, Laportea_canadensis
Australian_nettle, Australian_nettle_tree
pellitory-of-the-wall, wall_pellitory, pellitory, Parietaria_difussa
richweed, clearweed, dead_nettle, Pilea_pumilla
artillery_plant, Pilea_microphylla
friendship_plant, panamica, panamiga, Pilea_involucrata
Queensland_grass-cloth_plant, Pipturus_argenteus
Pipturus_albidus
cannabis, hemp
Indian_hemp, Cannabis_indica
mulberry, mulberry_tree
white_mulberry, Morus_alba
black_mulberry, Morus_nigra
red_mulberry, Morus_rubra
osage_orange, bow_wood, mock_orange, Maclura_pomifera
breadfruit, breadfruit_tree, Artocarpus_communis, Artocarpus_altilis
jackfruit, jackfruit_tree, Artocarpus_heterophyllus
marang, marang_tree, Artocarpus_odoratissima
fig_tree
fig, common_fig, common_fig_tree, Ficus_carica
caprifig, Ficus_carica_sylvestris
golden_fig, Florida_strangler_fig, strangler_fig, wild_fig, Ficus_aurea
banyan, banyan_tree, banian, banian_tree, Indian_banyan, East_Indian_fig_tree, Ficus_bengalensis
pipal, pipal_tree, pipul, peepul, sacred_fig, bo_tree, Ficus_religiosa
India-rubber_tree, India-rubber_plant, India-rubber_fig, rubber_plant, Assam_rubber, Ficus_elastica
mistletoe_fig, mistletoe_rubber_plant, Ficus_diversifolia, Ficus_deltoidea
Port_Jackson_fig, rusty_rig, little-leaf_fig, Botany_Bay_fig, Ficus_rubiginosa
sycamore, sycamore_fig, mulberry_fig, Ficus_sycomorus
paper_mulberry, Broussonetia_papyrifera
trumpetwood, trumpet-wood, trumpet_tree, snake_wood, imbauba, Cecropia_peltata
elm, elm_tree
winged_elm, wing_elm, Ulmus_alata
American_elm, white_elm, water_elm, rock_elm, Ulmus_americana
smooth-leaved_elm, European_field_elm, Ulmus_carpinifolia
cedar_elm, Ulmus_crassifolia
witch_elm, wych_elm, Ulmus_glabra
Dutch_elm, Ulmus_hollandica
Huntingdon_elm, Ulmus_hollandica_vegetata
water_elm, Ulmus_laevis
Chinese_elm, Ulmus_parvifolia
English_elm, European_elm, Ulmus_procera
Siberian_elm, Chinese_elm, dwarf_elm, Ulmus_pumila
slippery_elm, red_elm, Ulmus_rubra
Jersey_elm, guernsey_elm, wheately_elm, Ulmus_sarniensis, Ulmus_campestris_sarniensis, Ulmus_campestris_wheatleyi
September_elm, red_elm, Ulmus_serotina
rock_elm, Ulmus_thomasii
hackberry, nettle_tree
European_hackberry, Mediterranean_hackberry, Celtis_australis
American_hackberry, Celtis_occidentalis
sugarberry, Celtis_laevigata
iridaceous_plant
bearded_iris
beardless_iris
orrisroot, orris
dwarf_iris, Iris_cristata
Dutch_iris, Iris_filifolia
Florentine_iris, orris, Iris_germanica_florentina, Iris_florentina
stinking_iris, gladdon, gladdon_iris, stinking_gladwyn, roast_beef_plant, Iris_foetidissima
German_iris, Iris_germanica
Japanese_iris, Iris_kaempferi
German_iris, Iris_kochii
Dalmatian_iris, Iris_pallida
Persian_iris, Iris_persica
Dutch_iris, Iris_tingitana
dwarf_iris, vernal_iris, Iris_verna
Spanish_iris, xiphium_iris, Iris_xiphium
blackberry-lily, leopard_lily, Belamcanda_chinensis
crocus
saffron, saffron_crocus, Crocus_sativus
corn_lily
blue-eyed_grass
wandflower, Sparaxis_tricolor
amaryllis
salsilla, Bomarea_edulis
salsilla, Bomarea_salsilla
blood_lily
Cape_tulip, Haemanthus_coccineus
hippeastrum, Hippeastrum_puniceum
narcissus
daffodil, Narcissus_pseudonarcissus
jonquil, Narcissus_jonquilla
jonquil
Jacobean_lily, Aztec_lily, Strekelia_formosissima
liliaceous_plant
mountain_lily, Lilium_auratum
Canada_lily, wild_yellow_lily, meadow_lily, wild_meadow_lily, Lilium_canadense
tiger_lily, leopard_lily, pine_lily, Lilium_catesbaei
Columbia_tiger_lily, Oregon_lily, Lilium_columbianum
tiger_lily, devil_lily, kentan, Lilium_lancifolium
Easter_lily, Bermuda_lily, white_trumpet_lily, Lilium_longiflorum
coast_lily, Lilium_maritinum
Turk's-cap, martagon, Lilium_martagon
Michigan_lily, Lilium_michiganense
leopard_lily, panther_lily, Lilium_pardalinum
Turk's-cap, Turk's_cap-lily, Lilium_superbum
African_lily, African_tulip, blue_African_lily, Agapanthus_africanus
colicroot, colic_root, crow_corn, star_grass, unicorn_root
ague_root, ague_grass, Aletris_farinosa
yellow_colicroot, Aletris_aurea
alliaceous_plant
Hooker's_onion, Allium_acuminatum
wild_leek, Levant_garlic, kurrat, Allium_ampeloprasum
Canada_garlic, meadow_leek, rose_leek, Allium_canadense
keeled_garlic, Allium_carinatum
onion
shallot, eschalot, multiplier_onion, Allium_cepa_aggregatum, Allium_ascalonicum
nodding_onion, nodding_wild_onion, lady's_leek, Allium_cernuum
Welsh_onion, Japanese_leek, Allium_fistulosum
red-skinned_onion, Allium_haematochiton
daffodil_garlic, flowering_onion, Naples_garlic, Allium_neopolitanum
few-flowered_leek, Allium_paradoxum
garlic, Allium_sativum
sand_leek, giant_garlic, Spanish_garlic, rocambole, Allium_scorodoprasum
chives, chive, cive, schnittlaugh, Allium_schoenoprasum
crow_garlic, false_garlic, field_garlic, stag's_garlic, wild_garlic, Allium_vineale
wild_garlic, wood_garlic, Ramsons, Allium_ursinum
garlic_chive, Chinese_chive, Oriental_garlic, Allium_tuberosum
round-headed_leek, Allium_sphaerocephalum
three-cornered_leek, triquetrous_leek, Allium_triquetrum
cape_aloe, Aloe_ferox
kniphofia, tritoma, flame_flower, flame-flower, flameflower
poker_plant, Kniphofia_uvaria
red-hot_poker, Kniphofia_praecox
fly_poison, Amianthum_muscaetoxicum, Amianthum_muscitoxicum
amber_lily, Anthericum_torreyi
asparagus, edible_asparagus, Asparagus_officinales
asparagus_fern, Asparagus_setaceous, Asparagus_plumosus
smilax, Asparagus_asparagoides
asphodel
Jacob's_rod
aspidistra, cast-iron_plant, bar-room_plant, Aspidistra_elatio
coral_drops, Bessera_elegans
Christmas_bells
climbing_onion, Bowiea_volubilis
mariposa, mariposa_tulip, mariposa_lily
globe_lily, fairy_lantern
cat's-ear
white_globe_lily, white_fairy_lantern, Calochortus_albus
yellow_globe_lily, golden_fairy_lantern, Calochortus_amabilis
rose_globe_lily, Calochortus_amoenus
star_tulip, elegant_cat's_ears, Calochortus_elegans
desert_mariposa_tulip, Calochortus_kennedyi
yellow_mariposa_tulip, Calochortus_luteus
sagebrush_mariposa_tulip, Calochortus_macrocarpus
sego_lily, Calochortus_nuttallii
camas, camass, quamash, camosh, camash
common_camas, Camassia_quamash
Leichtlin's_camas, Camassia_leichtlinii
wild_hyacinth, indigo_squill, Camassia_scilloides
dogtooth_violet, dogtooth, dog's-tooth_violet
white_dogtooth_violet, white_dog's-tooth_violet, blonde_lilian, Erythronium_albidum
yellow_adder's_tongue, trout_lily, amberbell, Erythronium_americanum
European_dogtooth, Erythronium_dens-canis
fawn_lily, Erythronium_californicum
glacier_lily, snow_lily, Erythronium_grandiflorum
avalanche_lily, Erythronium_montanum
fritillary, checkered_lily
mission_bells, rice-grain_fritillary, Fritillaria_affinis, Fritillaria_lanceolata, Fritillaria_mutica
mission_bells, black_fritillary, Fritillaria_biflora
stink_bell, Fritillaria_agrestis
crown_imperial, Fritillaria_imperialis
white_fritillary, Fritillaria_liliaceae
snake's_head_fritillary, guinea-hen_flower, checkered_daffodil, leper_lily, Fritillaria_meleagris
adobe_lily, pink_fritillary, Fritillaria_pluriflora
scarlet_fritillary, Fritillaria_recurva
tulip
dwarf_tulip, Tulipa_armena, Tulipa_suaveolens
lady_tulip, candlestick_tulip, Tulipa_clusiana
Tulipa_gesneriana
cottage_tulip
Darwin_tulip
gloriosa, glory_lily, climbing_lily, creeping_lily, Gloriosa_superba
lemon_lily, Hemerocallis_lilio-asphodelus, Hemerocallis_flava
common_hyacinth, Hyacinthus_orientalis
Roman_hyacinth, Hyacinthus_orientalis_albulus
summer_hyacinth, cape_hyacinth, Hyacinthus_candicans, Galtonia_candicans
star-of-Bethlehem
bath_asparagus, Prussian_asparagus, Ornithogalum_pyrenaicum
grape_hyacinth
common_grape_hyacinth, Muscari_neglectum
tassel_hyacinth, Muscari_comosum
scilla, squill
spring_squill, Scilla_verna, sea_onion
false_asphodel
Scotch_asphodel, Tofieldia_pusilla
sea_squill, sea_onion, squill, Urginea_maritima
squill
butcher's_broom, Ruscus_aculeatus
bog_asphodel
European_bog_asphodel, Narthecium_ossifragum
American_bog_asphodel, Narthecium_americanum
hellebore, false_hellebore
white_hellebore, American_hellebore, Indian_poke, bugbane, Veratrum_viride
squaw_grass, bear_grass, Xerophyllum_tenax
death_camas, zigadene
alkali_grass, Zigadenus_elegans
white_camas, Zigadenus_glaucus
poison_camas, Zigadenus_nuttalli
grassy_death_camas, Zigadenus_venenosus, Zigadenus_venenosus_gramineus
prairie_wake-robin, prairie_trillium, Trillium_recurvatum
dwarf-white_trillium, snow_trillium, early_wake-robin
herb_Paris, Paris_quadrifolia
sarsaparilla
bullbrier, greenbrier, catbrier, horse_brier, horse-brier, brier, briar, Smilax_rotundifolia
rough_bindweed, Smilax_aspera
clintonia, Clinton's_lily
false_lily_of_the_valley, Maianthemum_canadense
false_lily_of_the_valley, Maianthemum_bifolium
Solomon's-seal
great_Solomon's-seal, Polygonatum_biflorum, Polygonatum_commutatum
bellwort, merry_bells, wild_oats
strawflower, cornflower, Uvularia_grandiflora
pia, Indian_arrowroot, Tacca_leontopetaloides, Tacca_pinnatifida
agave, century_plant, American_aloe
American_agave, Agave_americana
sisal, Agave_sisalana
maguey, cantala, Agave_cantala
maguey, Agave_atrovirens
Agave_tequilana
cabbage_tree, grass_tree, Cordyline_australis
dracaena
tuberose, Polianthes_tuberosa
sansevieria, bowstring_hemp
African_bowstring_hemp, African_hemp, Sansevieria_guineensis
Ceylon_bowstring_hemp, Sansevieria_zeylanica
mother-in-law's_tongue, snake_plant, Sansevieria_trifasciata
Spanish_bayonet, Yucca_aloifolia
Spanish_bayonet, Yucca_baccata
Joshua_tree, Yucca_brevifolia
soapweed, soap-weed, soap_tree, Yucca_elata
Adam's_needle, Adam's_needle-and-thread, spoonleaf_yucca, needle_palm, Yucca_filamentosa
bear_grass, Yucca_glauca
Spanish_dagger, Yucca_gloriosa
Our_Lord's_candle, Yucca_whipplei
water_shamrock, buckbean, bogbean, bog_myrtle, marsh_trefoil, Menyanthes_trifoliata
butterfly_bush, buddleia
yellow_jasmine, yellow_jessamine, Carolina_jasmine, evening_trumpet_flower, Gelsemium_sempervirens
flax
calabar_bean, ordeal_bean
bonduc, bonduc_tree, Caesalpinia_bonduc, Caesalpinia_bonducella
divi-divi, Caesalpinia_coriaria
Mysore_thorn, Caesalpinia_decapetala, Caesalpinia_sepiaria
brazilian_ironwood, Caesalpinia_ferrea
bird_of_paradise, poinciana, Caesalpinia_gilliesii, Poinciana_gilliesii
shingle_tree, Acrocarpus_fraxinifolius
mountain_ebony, orchid_tree, Bauhinia_variegata
msasa, Brachystegia_speciformis
cassia
golden_shower_tree, drumstick_tree, purging_cassia, pudding_pipe_tree, canafistola, canafistula, Cassia_fistula
pink_shower, pink_shower_tree, horse_cassia, Cassia_grandis
rainbow_shower, Cassia_javonica
horse_cassia, Cassia_roxburghii, Cassia_marginata
carob, carob_tree, carob_bean_tree, algarroba, Ceratonia_siliqua
carob, carob_bean, algarroba_bean, algarroba, locust_bean, locust_pod
paloverde
royal_poinciana, flamboyant, flame_tree, peacock_flower, Delonix_regia, Poinciana_regia
locust_tree, locust
water_locust, swamp_locust, Gleditsia_aquatica
honey_locust, Gleditsia_triacanthos
Kentucky_coffee_tree, bonduc, chicot, Gymnocladus_dioica
logwood, logwood_tree, campeachy, bloodwood_tree, Haematoxylum_campechianum
Jerusalem_thorn, horsebean, Parkinsonia_aculeata
palo_verde, Parkinsonia_florida, Cercidium_floridum
Dalmatian_laburnum, Petteria_ramentacea, Cytisus_ramentaceus
senna
avaram, tanner's_cassia, Senna_auriculata, Cassia_auriculata
Alexandria_senna, Alexandrian_senna, true_senna, tinnevelly_senna, Indian_senna, Senna_alexandrina, Cassia_acutifolia, Cassia_augustifolia
wild_senna, Senna_marilandica, Cassia_marilandica
sicklepod, Senna_obtusifolia, Cassia_tora
coffee_senna, mogdad_coffee, styptic_weed, stinking_weed, Senna_occidentalis, Cassia_occidentalis
tamarind, tamarind_tree, tamarindo, Tamarindus_indica
false_indigo, bastard_indigo, Amorpha_californica
false_indigo, bastard_indigo, Amorpha_fruticosa
hog_peanut, wild_peanut, Amphicarpaea_bracteata, Amphicarpa_bracteata
angelim, andelmin
cabbage_bark, cabbage-bark_tree, cabbage_tree, Andira_inermis
kidney_vetch, Anthyllis_vulneraria
groundnut, groundnut_vine, Indian_potato, potato_bean, wild_bean, Apios_americana, Apios_tuberosa
rooibos, Aspalathus_linearis, Aspalathus_cedcarbergensis
milk_vetch, milk-vetch
alpine_milk_vetch, Astragalus_alpinus
purple_milk_vetch, Astragalus_danicus
camwood, African_sandalwood, Baphia_nitida
wild_indigo, false_indigo
blue_false_indigo, Baptisia_australis
white_false_indigo, Baptisia_lactea
indigo_broom, horsefly_weed, rattle_weed, Baptisia_tinctoria
dhak, dak, palas, Butea_frondosa, Butea_monosperma
pigeon_pea, pigeon-pea_plant, cajan_pea, catjang_pea, red_gram, dhal, dahl, Cajanus_cajan
sword_bean, Canavalia_gladiata
pea_tree, caragana
Siberian_pea_tree, Caragana_arborescens
Chinese_pea_tree, Caragana_sinica
Moreton_Bay_chestnut, Australian_chestnut
butterfly_pea, Centrosema_virginianum
Judas_tree, love_tree, Circis_siliquastrum
redbud, Cercis_canadensis
western_redbud, California_redbud, Cercis_occidentalis
tagasaste, Chamaecytisus_palmensis, Cytesis_proliferus
weeping_tree_broom
flame_pea
chickpea, chickpea_plant, Egyptian_pea, Cicer_arietinum
chickpea, garbanzo
Kentucky_yellowwood, gopherwood, Cladrastis_lutea, Cladrastis_kentukea
glory_pea, clianthus
desert_pea, Sturt_pea, Sturt's_desert_pea, Clianthus_formosus, Clianthus_speciosus
parrot's_beak, parrot's_bill, Clianthus_puniceus
butterfly_pea, Clitoria_mariana
blue_pea, butterfly_pea, Clitoria_turnatea
telegraph_plant, semaphore_plant, Codariocalyx_motorius, Desmodium_motorium, Desmodium_gyrans
bladder_senna, Colutea_arborescens
axseed, crown_vetch, Coronilla_varia
crotalaria, rattlebox
guar, cluster_bean, Cyamopsis_tetragonolobus, Cyamopsis_psoraloides
white_broom, white_Spanish_broom, Cytisus_albus, Cytisus_multiflorus
common_broom, Scotch_broom, green_broom, Cytisus_scoparius
rosewood, rosewood_tree
Indian_blackwood, East_Indian_rosewood, East_India_rosewood, Indian_rosewood, Dalbergia_latifolia
sissoo, sissu, sisham, Dalbergia_sissoo
kingwood, kingwood_tree, Dalbergia_cearensis
Brazilian_rosewood, caviuna_wood, jacaranda, Dalbergia_nigra
cocobolo, Dalbergia_retusa
blackwood, blackwood_tree
bitter_pea
derris
derris_root, tuba_root, Derris_elliptica
prairie_mimosa, prickle-weed, Desmanthus_ilinoensis
tick_trefoil, beggar_lice, beggar's_lice
beggarweed, Desmodium_tortuosum, Desmodium_purpureum
Australian_pea, Dipogon_lignosus, Dolichos_lignosus
coral_tree, erythrina
kaffir_boom, Cape_kafferboom, Erythrina_caffra
coral_bean_tree, Erythrina_corallodendrum
ceibo, crybaby_tree, cry-baby_tree, common_coral_tree, Erythrina_crista-galli
kaffir_boom, Transvaal_kafferboom, Erythrina_lysistemon
Indian_coral_tree, Erythrina_variegata, Erythrina_Indica
cork_tree, Erythrina_vespertilio
goat's_rue, goat_rue, Galega_officinalis
poison_bush, poison_pea, gastrolobium
Spanish_broom, Spanish_gorse, Genista_hispanica
woodwaxen, dyer's_greenweed, dyer's-broom, dyeweed, greenweed, whin, woadwaxen, Genista_tinctoria
chanar, chanal, Geoffroea_decorticans
gliricidia
soy, soybean, soya_bean
licorice, liquorice, Glycyrrhiza_glabra
wild_licorice, wild_liquorice, American_licorice, American_liquorice, Glycyrrhiza_lepidota
licorice_root
Western_Australia_coral_pea, Hardenbergia_comnptoniana
sweet_vetch, Hedysarum_boreale
French_honeysuckle, sulla, Hedysarum_coronarium
anil, Indigofera_suffruticosa, Indigofera_anil
scarlet_runner, running_postman, Kennedia_prostrata
hyacinth_bean, bonavist, Indian_bean, Egyptian_bean, Lablab_purpureus, Dolichos_lablab
Scotch_laburnum, Alpine_golden_chain, Laburnum_alpinum
vetchling
wild_pea
everlasting_pea
beach_pea, sea_pea, Lathyrus_maritimus, Lathyrus_japonicus
grass_vetch, grass_vetchling, Lathyrus_nissolia
marsh_pea, Lathyrus_palustris
common_vetchling, meadow_pea, yellow_vetchling, Lathyrus_pratensis
grass_pea, Indian_pea, khesari, Lathyrus_sativus
Tangier_pea, Tangier_peavine, Lalthyrus_tingitanus
heath_pea, earth-nut_pea, earthnut_pea, tuberous_vetch, Lathyrus_tuberosus
bicolor_lespediza, ezo-yama-hagi, Lespedeza_bicolor
japanese_clover, japan_clover, jap_clover, Lespedeza_striata
Korean_lespedeza, Lespedeza_stipulacea
sericea_lespedeza, Lespedeza_sericea, Lespedeza_cuneata
lentil, lentil_plant, Lens_culinaris
lentil
prairie_bird's-foot_trefoil, compass_plant, prairie_lotus, prairie_trefoil, Lotus_americanus
bird's_foot_trefoil, bird's_foot_clover, babies'_slippers, bacon_and_eggs, Lotus_corniculatus
winged_pea, asparagus_pea, Lotus_tetragonolobus
lupine, lupin
white_lupine, field_lupine, wolf_bean, Egyptian_lupine, Lupinus_albus
tree_lupine, Lupinus_arboreus
wild_lupine, sundial_lupine, Indian_beet, old-maid's_bonnet, Lupinus_perennis
bluebonnet, buffalo_clover, Texas_bluebonnet, Lupinus_subcarnosus
Texas_bluebonnet, Lupinus_texensis
medic, medick, trefoil
moon_trefoil, Medicago_arborea
sickle_alfalfa, sickle_lucerne, sickle_medick, Medicago_falcata
Calvary_clover, Medicago_intertexta, Medicago_echinus
black_medick, hop_clover, yellow_trefoil, nonesuch_clover, Medicago_lupulina
alfalfa, lucerne, Medicago_sativa
millettia
mucuna
cowage, velvet_bean, Bengal_bean, Benghal_bean, Florida_bean, Mucuna_pruriens_utilis, Mucuna_deeringiana, Mucuna_aterrima, Stizolobium_deeringiana
tolu_tree, tolu_balsam_tree, Myroxylon_balsamum, Myroxylon_toluiferum
Peruvian_balsam, Myroxylon_pereirae, Myroxylon_balsamum_pereirae
sainfoin, sanfoin, holy_clover, esparcet, Onobrychis_viciifolia, Onobrychis_viciaefolia
restharrow, rest-harrow, Ononis_repens
bead_tree, jumby_bean, jumby_tree, Ormosia_monosperma
jumby_bead, jumbie_bead, Ormosia_coarctata
locoweed, crazyweed, crazy_weed
purple_locoweed, purple_loco, Oxytropis_lambertii
tumbleweed
yam_bean, Pachyrhizus_erosus
shamrock_pea, Parochetus_communis
pole_bean
kidney_bean, frijol, frijole
haricot
wax_bean
scarlet_runner, scarlet_runner_bean, Dutch_case-knife_bean, runner_bean, Phaseolus_coccineus, Phaseolus_multiflorus
lima_bean, lima_bean_plant, Phaseolus_limensis
sieva_bean, butter_bean, butter-bean_plant, lima_bean, Phaseolus_lunatus
tepary_bean, Phaseolus_acutifolius_latifolius
chaparral_pea, stingaree-bush, Pickeringia_montana
Jamaica_dogwood, fish_fuddle, Piscidia_piscipula, Piscidia_erythrina
pea
garden_pea
edible-pod_pea, edible-podded_pea, Pisum_sativum_macrocarpon
sugar_snap_pea, snap_pea
field_pea, field-pea_plant, Austrian_winter_pea, Pisum_sativum_arvense, Pisum_arvense
field_pea
common_flat_pea, native_holly, Playlobium_obtusangulum
quira
roble, Platymiscium_trinitatis
Panama_redwood_tree, Panama_redwood, Platymiscium_pinnatum
Indian_beech, Pongamia_glabra
winged_bean, winged_pea, goa_bean, goa_bean_vine, Manila_bean, Psophocarpus_tetragonolobus
breadroot, Indian_breadroot, pomme_blanche, pomme_de_prairie, Psoralea_esculenta
bloodwood_tree, kiaat, Pterocarpus_angolensis
kino, Pterocarpus_marsupium
red_sandalwood, red_sanders, red_sanderswood, red_saunders, Pterocarpus_santalinus
kudzu, kudzu_vine, Pueraria_lobata
bristly_locust, rose_acacia, moss_locust, Robinia_hispida
black_locust, yellow_locust, Robinia_pseudoacacia
clammy_locust, Robinia_viscosa
carib_wood, Sabinea_carinalis
Colorado_River_hemp, Sesbania_exaltata
scarlet_wisteria_tree, vegetable_hummingbird, Sesbania_grandiflora
Japanese_pagoda_tree, Chinese_scholartree, Chinese_scholar_tree, Sophora_japonica, Sophora_sinensis
mescal_bean, coral_bean, frijolito, frijolillo, Sophora_secundiflora
kowhai, Sophora_tetraptera
jade_vine, emerald_creeper, Strongylodon_macrobotrys
hoary_pea
bastard_indigo, Tephrosia_purpurea
catgut, goat's_rue, wild_sweet_pea, Tephrosia_virginiana
bush_pea
false_lupine, golden_pea, yellow_pea, Thermopsis_macrophylla
Carolina_lupine, Thermopsis_villosa
tipu, tipu_tree, yellow_jacaranda, pride_of_Bolivia
bird's_foot_trefoil, Trigonella_ornithopodioides
fenugreek, Greek_clover, Trigonella_foenumgraecum
gorse, furze, whin, Irish_gorse, Ulex_europaeus
vetch
tufted_vetch, bird_vetch, Calnada_pea, Vicia_cracca
broad_bean, fava_bean, horsebean
bitter_betch, Vicia_orobus
bush_vetch, Vicia_sepium
moth_bean, Vigna_aconitifolia, Phaseolus_aconitifolius
snailflower, snail-flower, snail_flower, snail_bean, corkscrew_flower, Vigna_caracalla, Phaseolus_caracalla
mung, mung_bean, green_gram, golden_gram, Vigna_radiata, Phaseolus_aureus
cowpea, cowpea_plant, black-eyed_pea, Vigna_unguiculata, Vigna_sinensis
cowpea, black-eyed_pea
asparagus_bean, yard-long_bean, Vigna_unguiculata_sesquipedalis, Vigna_sesquipedalis
swamp_oak, Viminaria_juncea, Viminaria_denudata
keurboom, Virgilia_capensis, Virgilia_oroboides
keurboom, Virgilia_divaricata
Japanese_wistaria, Wisteria_floribunda
Chinese_wistaria, Wisteria_chinensis
American_wistaria, American_wisteria, Wisteria_frutescens
silky_wisteria, Wisteria_venusta
palm, palm_tree
sago_palm
feather_palm
fan_palm
palmetto
coyol, coyol_palm, Acrocomia_vinifera
grugru, gri-gri, grugru_palm, macamba, Acrocomia_aculeata
areca
betel_palm, Areca_catechu
sugar_palm, gomuti, gomuti_palm, Arenga_pinnata
piassava_palm, pissaba_palm, Bahia_piassava, bahia_coquilla, Attalea_funifera
coquilla_nut
palmyra, palmyra_palm, toddy_palm, wine_palm, lontar, longar_palm, Borassus_flabellifer
calamus
rattan, rattan_palm, Calamus_rotang
lawyer_cane, Calamus_australis
fishtail_palm
wine_palm, jaggery_palm, kitul, kittul, kitul_tree, toddy_palm, Caryota_urens
wax_palm, Ceroxylon_andicola, Ceroxylon_alpinum
coconut, coconut_palm, coco_palm, coco, cocoa_palm, coconut_tree, Cocos_nucifera
carnauba, carnauba_palm, wax_palm, Copernicia_prunifera, Copernicia_cerifera
caranday, caranda, caranda_palm, wax_palm, Copernicia_australis, Copernicia_alba
corozo, corozo_palm
gebang_palm, Corypha_utan, Corypha_gebanga
latanier, latanier_palm
talipot, talipot_palm, Corypha_umbraculifera
oil_palm
African_oil_palm, Elaeis_guineensis
American_oil_palm, Elaeis_oleifera
palm_nut, palm_kernel
cabbage_palm, Euterpe_oleracea
cabbage_palm, cabbage_tree, Livistona_australis
true_sago_palm, Metroxylon_sagu
nipa_palm, Nipa_fruticans
babassu, babassu_palm, coco_de_macao, Orbignya_phalerata, Orbignya_spesiosa, Orbignya_martiana
babassu_nut
cohune_palm, Orbignya_cohune, cohune
cohune_nut
date_palm, Phoenix_dactylifera
ivory_palm, ivory-nut_palm, ivory_plant, Phytelephas_macrocarpa
raffia_palm, Raffia_farinifera, Raffia_ruffia
bamboo_palm, Raffia_vinifera
lady_palm
miniature_fan_palm, bamboo_palm, fern_rhapis, Rhapis_excelsa
reed_rhapis, slender_lady_palm, Rhapis_humilis
royal_palm, Roystonea_regia
cabbage_palm, Roystonea_oleracea
cabbage_palmetto, cabbage_palm, Sabal_palmetto
saw_palmetto, scrub_palmetto, Serenoa_repens
thatch_palm, thatch_tree, silver_thatch, broom_palm, Thrinax_parviflora
key_palm, silvertop_palmetto, silver_thatch, Thrinax_microcarpa, Thrinax_morrisii, Thrinax_keyensis
English_plantain, narrow-leaved_plantain, ribgrass, ribwort, ripple-grass, buckthorn, Plantago_lanceolata
broad-leaved_plantain, common_plantain, white-man's_foot, whiteman's_foot, cart-track_plant, Plantago_major
hoary_plantain, Plantago_media
fleawort, psyllium, Spanish_psyllium, Plantago_psyllium
rugel's_plantain, broad-leaved_plantain, Plantago_rugelii
hoary_plantain, Plantago_virginica
buckwheat, Polygonum_fagopyrum, Fagopyrum_esculentum
prince's-feather, princess_feather, kiss-me-over-the-garden-gate, prince's-plume, Polygonum_orientale
eriogonum
umbrella_plant, Eriogonum_allenii
wild_buckwheat, California_buckwheat, Erigonum_fasciculatum
rhubarb, rhubarb_plant
Himalayan_rhubarb, Indian_rhubarb, red-veined_pie_plant, Rheum_australe, Rheum_emodi
pie_plant, garden_rhubarb, Rheum_cultorum, Rheum_rhabarbarum, Rheum_rhaponticum
Chinese_rhubarb, Rheum_palmatum
sour_dock, garden_sorrel, Rumex_acetosa
sheep_sorrel, sheep's_sorrel, Rumex_acetosella
bitter_dock, broad-leaved_dock, yellow_dock, Rumex_obtusifolius
French_sorrel, garden_sorrel, Rumex_scutatus
yellow-eyed_grass
commelina
spiderwort, dayflower
pineapple, pineapple_plant, Ananas_comosus
pipewort, Eriocaulon_aquaticum
water_hyacinth, water_orchid, Eichhornia_crassipes, Eichhornia_spesiosa
water_star_grass, mud_plantain, Heteranthera_dubia
naiad, water_nymph
water_plantain, Alisma_plantago-aquatica
narrow-leaved_water_plantain
hydrilla, Hydrilla_verticillata
American_frogbit, Limnodium_spongia
waterweed
Canadian_pondweed, Elodea_canadensis
tape_grass, eelgrass, wild_celery, Vallisneria_spiralis
pondweed
curled_leaf_pondweed, curly_pondweed, Potamogeton_crispus
loddon_pondweed, Potamogeton_nodosus, Potamogeton_americanus
frog's_lettuce
arrow_grass, Triglochin_maritima
horned_pondweed, Zannichellia_palustris
eelgrass, grass_wrack, sea_wrack, Zostera_marina
rose, rosebush
hip, rose_hip, rosehip
banksia_rose, Rosa_banksia
damask_rose, summer_damask_rose, Rosa_damascena
sweetbrier, sweetbriar, brier, briar, eglantine, Rosa_eglanteria
Cherokee_rose, Rosa_laevigata
musk_rose, Rosa_moschata
agrimonia, agrimony
harvest-lice, Agrimonia_eupatoria
fragrant_agrimony, Agrimonia_procera
alderleaf_Juneberry, alder-leaved_serviceberry, Amelanchier_alnifolia
flowering_quince
japonica, maule's_quince, Chaenomeles_japonica
coco_plum, coco_plum_tree, cocoa_plum, icaco, Chrysobalanus_icaco
cotoneaster
Cotoneaster_dammeri
Cotoneaster_horizontalis
parsley_haw, parsley-leaved_thorn, Crataegus_apiifolia, Crataegus_marshallii
scarlet_haw, Crataegus_biltmoreana
blackthorn, pear_haw, pear_hawthorn, Crataegus_calpodendron, Crataegus_tomentosa
cockspur_thorn, cockspur_hawthorn, Crataegus_crus-galli
mayhaw, summer_haw, Crataegus_aestivalis
red_haw, downy_haw, Crataegus_mollis, Crataegus_coccinea_mollis
red_haw, Crataegus_pedicellata, Crataegus_coccinea
quince, quince_bush, Cydonia_oblonga
mountain_avens, Dryas_octopetala
loquat, loquat_tree, Japanese_medlar, Japanese_plum, Eriobotrya_japonica
beach_strawberry, Chilean_strawberry, Fragaria_chiloensis
Virginia_strawberry, scarlet_strawberry, Fragaria_virginiana
avens
yellow_avens, Geum_alleppicum_strictum, Geum_strictum
yellow_avens, Geum_macrophyllum
prairie_smoke, purple_avens, Geum_triflorum
bennet, white_avens, Geum_virginianum
toyon, tollon, Christmasberry, Christmas_berry, Heteromeles_arbutifolia, Photinia_arbutifolia
apple_tree
apple, orchard_apple_tree, Malus_pumila
wild_apple, crab_apple, crabapple
crab_apple, crabapple, cultivated_crab_apple
Siberian_crab, Siberian_crab_apple, cherry_apple, cherry_crab, Malus_baccata
wild_crab, Malus_sylvestris
American_crab_apple, garland_crab, Malus_coronaria
Oregon_crab_apple, Malus_fusca
Southern_crab_apple, flowering_crab, Malus_angustifolia
Iowa_crab, Iowa_crab_apple, prairie_crab, western_crab_apple, Malus_ioensis
Bechtel_crab, flowering_crab
medlar, medlar_tree, Mespilus_germanica
cinquefoil, five-finger
silverweed, goose-tansy, goose_grass, Potentilla_anserina
salad_burnet, burnet_bloodwort, pimpernel, Poterium_sanguisorba
plum, plum_tree
wild_plum, wild_plum_tree
Allegheny_plum, Alleghany_plum, sloe, Prunus_alleghaniensis
American_red_plum, August_plum, goose_plum, Prunus_americana
chickasaw_plum, hog_plum, hog_plum_bush, Prunus_angustifolia
beach_plum, beach_plum_bush, Prunus_maritima
common_plum, Prunus_domestica
bullace, Prunus_insititia
damson_plum, damson_plum_tree, Prunus_domestica_insititia
big-tree_plum, Prunus_mexicana
Canada_plum, Prunus_nigra
plumcot, plumcot_tree
apricot, apricot_tree
Japanese_apricot, mei, Prunus_mume
common_apricot, Prunus_armeniaca
purple_apricot, black_apricot, Prunus_dasycarpa
cherry, cherry_tree
wild_cherry, wild_cherry_tree
wild_cherry
sweet_cherry, Prunus_avium
heart_cherry, oxheart, oxheart_cherry
gean, mazzard, mazzard_cherry
capulin, capulin_tree, Prunus_capuli
cherry_laurel, laurel_cherry, mock_orange, wild_orange, Prunus_caroliniana
cherry_plum, myrobalan, myrobalan_plum, Prunus_cerasifera
sour_cherry, sour_cherry_tree, Prunus_cerasus
amarelle, Prunus_cerasus_caproniana
morello, Prunus_cerasus_austera
marasca
almond_tree
almond, sweet_almond, Prunus_dulcis, Prunus_amygdalus, Amygdalus_communis
bitter_almond, Prunus_dulcis_amara, Amygdalus_communis_amara
jordan_almond
dwarf_flowering_almond, Prunus_glandulosa
holly-leaved_cherry, holly-leaf_cherry, evergreen_cherry, islay, Prunus_ilicifolia
fuji, fuji_cherry, Prunus_incisa
flowering_almond, oriental_bush_cherry, Prunus_japonica
cherry_laurel, laurel_cherry, Prunus_laurocerasus
Catalina_cherry, Prunus_lyonii
bird_cherry, bird_cherry_tree
hagberry_tree, European_bird_cherry, common_bird_cherry, Prunus_padus
hagberry
pin_cherry, Prunus_pensylvanica
peach, peach_tree, Prunus_persica
nectarine, nectarine_tree, Prunus_persica_nectarina
sand_cherry, Prunus_pumila, Prunus_pumilla_susquehanae, Prunus_susquehanae, Prunus_cuneata
Japanese_plum, Prunus_salicina
black_cherry, black_cherry_tree, rum_cherry, Prunus_serotina
flowering_cherry
oriental_cherry, Japanese_cherry, Japanese_flowering_cherry, Prunus_serrulata
Japanese_flowering_cherry, Prunus_sieboldii
Sierra_plum, Pacific_plum, Prunus_subcordata
rosebud_cherry, winter_flowering_cherry, Prunus_subhirtella
Russian_almond, dwarf_Russian_almond, Prunus_tenella
flowering_almond, Prunus_triloba
chokecherry, chokecherry_tree, Prunus_virginiana
chokecherry
western_chokecherry, Prunus_virginiana_demissa, Prunus_demissa
Pyracantha, pyracanth, fire_thorn, firethorn
pear, pear_tree, Pyrus_communis
fruit_tree
bramble_bush
lawyerbush, lawyer_bush, bush_lawyer, Rubus_cissoides, Rubus_australis
stone_bramble, Rubus_saxatilis
sand_blackberry, Rubus_cuneifolius
boysenberry, boysenberry_bush
loganberry, Rubus_loganobaccus, Rubus_ursinus_loganobaccus
American_dewberry, Rubus_canadensis
Northern_dewberry, American_dewberry, Rubus_flagellaris
Southern_dewberry, Rubus_trivialis
swamp_dewberry, swamp_blackberry, Rubus_hispidus
European_dewberry, Rubus_caesius
raspberry, raspberry_bush
wild_raspberry, European_raspberry, framboise, Rubus_idaeus
American_raspberry, Rubus_strigosus, Rubus_idaeus_strigosus
black_raspberry, blackcap, blackcap_raspberry, thimbleberry, Rubus_occidentalis
salmonberry, Rubus_spectabilis
salmonberry, salmon_berry, thimbleberry, Rubus_parviflorus
wineberry, Rubus_phoenicolasius
mountain_ash
rowan, rowan_tree, European_mountain_ash, Sorbus_aucuparia
rowanberry
American_mountain_ash, Sorbus_americana
Western_mountain_ash, Sorbus_sitchensis
service_tree, sorb_apple, sorb_apple_tree, Sorbus_domestica
wild_service_tree, Sorbus_torminalis
spirea, spiraea
bridal_wreath, bridal-wreath, Saint_Peter's_wreath, St._Peter's_wreath, Spiraea_prunifolia
madderwort, rubiaceous_plant
Indian_madder, munjeet, Rubia_cordifolia
madder, Rubia_tinctorum
woodruff
dagame, lemonwood_tree, Calycophyllum_candidissimum
blolly, West_Indian_snowberry, Chiococca_alba
coffee, coffee_tree
Arabian_coffee, Coffea_arabica
Liberian_coffee, Coffea_liberica
robusta_coffee, Rio_Nunez_coffee, Coffea_robusta, Coffea_canephora
cinchona, chinchona
Cartagena_bark, Cinchona_cordifolia, Cinchona_lancifolia
calisaya, Cinchona_officinalis, Cinchona_ledgeriana, Cinchona_calisaya
cinchona_tree, Cinchona_pubescens
cinchona, cinchona_bark, Peruvian_bark, Jesuit's_bark
bedstraw
sweet_woodruff, waldmeister, woodruff, fragrant_bedstraw, Galium_odoratum, Asperula_odorata
Northern_bedstraw, Northern_snow_bedstraw, Galium_boreale
yellow_bedstraw, yellow_cleavers, Our_Lady's_bedstraw, Galium_verum
wild_licorice, Galium_lanceolatum
cleavers, clivers, goose_grass, catchweed, spring_cleavers, Galium_aparine
wild_madder, white_madder, white_bedstraw, infant's-breath, false_baby's_breath, Galium_mollugo
cape_jasmine, cape_jessamine, Gardenia_jasminoides, Gardenia_augusta
genipa
genipap_fruit, jagua, marmalade_box, Genipa_Americana
hamelia
scarlet_bush, scarlet_hamelia, coloradillo, Hamelia_patens, Hamelia_erecta
lemonwood, lemon-wood, lemonwood_tree, lemon-wood_tree, Psychotria_capensis
negro_peach, Sarcocephalus_latifolius, Sarcocephalus_esculentus
wild_medlar, wild_medlar_tree, medlar, Vangueria_infausta
Spanish_tamarind, Vangueria_madagascariensis
abelia
bush_honeysuckle, Diervilla_sessilifolia
American_twinflower, Linnaea_borealis_americana
honeysuckle
American_fly_honeysuckle, fly_honeysuckle, Lonicera_canadensis
Italian_honeysuckle, Italian_woodbine, Lonicera_caprifolium
yellow_honeysuckle, Lonicera_flava
hairy_honeysuckle, Lonicera_hirsuta
Japanese_honeysuckle, Lonicera_japonica
Hall's_honeysuckle, Lonicera_japonica_halliana
Morrow's_honeysuckle, Lonicera_morrowii
woodbine, Lonicera_periclymenum
trumpet_honeysuckle, coral_honeysuckle, trumpet_flower, trumpet_vine, Lonicera_sempervirens
European_fly_honeysuckle, European_honeysuckle, Lonicera_xylosteum
swamp_fly_honeysuckle
snowberry, common_snowberry, waxberry, Symphoricarpos_alba
coralberry, Indian_currant, Symphoricarpos_orbiculatus
blue_elder, blue_elderberry, Sambucus_caerulea
dwarf_elder, danewort, Sambucus_ebulus
American_red_elder, red-berried_elder, stinking_elder, Sambucus_pubens
European_red_elder, red-berried_elder, Sambucus_racemosa
feverroot, horse_gentian, tinker's_root, wild_coffee, Triostium_perfoliatum
cranberry_bush, cranberry_tree, American_cranberry_bush, highbush_cranberry, Viburnum_trilobum
wayfaring_tree, twist_wood, twistwood, Viburnum_lantana
guelder_rose, European_cranberrybush, European_cranberry_bush, crampbark, cranberry_tree, Viburnum_opulus
arrow_wood, Viburnum_recognitum
black_haw, Viburnum_prunifolium
weigela, Weigela_florida
teasel, teazel, teasle
common_teasel, Dipsacus_fullonum
fuller's_teasel, Dipsacus_sativus
wild_teasel, Dipsacus_sylvestris
scabious, scabiosa
sweet_scabious, pincushion_flower, mournful_widow, Scabiosa_atropurpurea
field_scabious, Scabiosa_arvensis
jewelweed, lady's_earrings, orange_balsam, celandine, touch-me-not, Impatiens_capensis
geranium
cranesbill, crane's_bill
wild_geranium, spotted_cranesbill, Geranium_maculatum
meadow_cranesbill, Geranium_pratense
Richardson's_geranium, Geranium_richardsonii
herb_robert, herbs_robert, herb_roberts, Geranium_robertianum
sticky_geranium, Geranium_viscosissimum
dove's_foot_geranium, Geranium_molle
rose_geranium, sweet-scented_geranium, Pelargonium_graveolens
fish_geranium, bedding_geranium, zonal_pelargonium, Pelargonium_hortorum
ivy_geranium, ivy-leaved_geranium, hanging_geranium, Pelargonium_peltatum
apple_geranium, nutmeg_geranium, Pelargonium_odoratissimum
lemon_geranium, Pelargonium_limoneum
storksbill, heron's_bill
musk_clover, muskus_grass, white-stemmed_filaree, Erodium_moschatum
incense_tree
elephant_tree, Bursera_microphylla
gumbo-limbo, Bursera_simaruba
Boswellia_carteri
salai, Boswellia_serrata
balm_of_gilead, Commiphora_meccanensis
myrrh_tree, Commiphora_myrrha
Protium_heptaphyllum
Protium_guianense
water_starwort
barbados_cherry, acerola, Surinam_cherry, West_Indian_cherry, Malpighia_glabra
mahogany, mahogany_tree
chinaberry, chinaberry_tree, China_tree, Persian_lilac, pride-of-India, azederach, azedarach, Melia_azederach, Melia_azedarach
neem, neem_tree, nim_tree, margosa, arishth, Azadirachta_indica, Melia_Azadirachta
neem_seed
Spanish_cedar, Spanish_cedar_tree, Cedrela_odorata
satinwood, satinwood_tree, Chloroxylon_swietenia
African_scented_mahogany, cedar_mahogany, sapele_mahogany, Entandrophragma_cylindricum
silver_ash
native_beech, flindosa, flindosy, Flindersia_australis
bunji-bunji, Flindersia_schottiana
African_mahogany
lanseh_tree, langsat, langset, Lansium_domesticum
true_mahogany, Cuban_mahogany, Dominican_mahogany, Swietinia_mahogani
Honduras_mahogany, Swietinia_macrophylla
Philippine_mahogany, Philippine_cedar, kalantas, Toona_calantas, Cedrela_calantas
caracolito, Ruptiliocarpon_caracolito
common_wood_sorrel, cuckoo_bread, shamrock, Oxalis_acetosella
Bermuda_buttercup, English-weed, Oxalis_pes-caprae, Oxalis_cernua
creeping_oxalis, creeping_wood_sorrel, Oxalis_corniculata
goatsfoot, goat's_foot, Oxalis_caprina
violet_wood_sorrel, Oxalis_violacea
oca, oka, Oxalis_tuberosa, Oxalis_crenata
carambola, carambola_tree, Averrhoa_carambola
bilimbi, Averrhoa_bilimbi
milkwort
senega, Polygala_alba
orange_milkwort, yellow_milkwort, candyweed, yellow_bachelor's_button, Polygala_lutea
flowering_wintergreen, gaywings, bird-on-the-wing, fringed_polygala, Polygala_paucifolia
Seneca_snakeroot, Seneka_snakeroot, senga_root, senega_root, senega_snakeroot, Polygala_senega
common_milkwort, gand_flower, Polygala_vulgaris
rue, herb_of_grace, Ruta_graveolens
citrus, citrus_tree
orange, orange_tree
sour_orange, Seville_orange, bitter_orange, bitter_orange_tree, bigarade, marmalade_orange, Citrus_aurantium
bergamot, bergamot_orange, Citrus_bergamia
pomelo, pomelo_tree, pummelo, shaddock, Citrus_maxima, Citrus_grandis, Citrus_decumana
citron, citron_tree, Citrus_medica
grapefruit, Citrus_paradisi
mandarin, mandarin_orange, mandarin_orange_tree, Citrus_reticulata
tangerine, tangerine_tree
clementine, clementine_tree
satsuma, satsuma_tree
sweet_orange, sweet_orange_tree, Citrus_sinensis
temple_orange, temple_orange_tree, tangor, king_orange, Citrus_nobilis
tangelo, tangelo_tree, ugli_fruit, Citrus_tangelo
rangpur, rangpur_lime, lemanderin, Citrus_limonia
lemon, lemon_tree, Citrus_limon
sweet_lemon, sweet_lime, Citrus_limetta
lime, lime_tree, Citrus_aurantifolia
citrange, citrange_tree, Citroncirus_webberi
fraxinella, dittany, burning_bush, gas_plant, Dictamnus_alba
kumquat, cumquat, kumquat_tree
marumi, marumi_kumquat, round_kumquat, Fortunella_japonica
nagami, nagami_kumquat, oval_kumquat, Fortunella_margarita
cork_tree, Phellodendron_amurense
trifoliate_orange, trifoliata, wild_orange, Poncirus_trifoliata
prickly_ash
toothache_tree, sea_ash, Zanthoxylum_americanum, Zanthoxylum_fraxineum
Hercules'-club, Hercules'-clubs, Hercules-club, Zanthoxylum_clava-herculis
bitterwood_tree
marupa, Simarouba_amara
paradise_tree, bitterwood, Simarouba_glauca
ailanthus
tree_of_heaven, tree_of_the_gods, Ailanthus_altissima
wild_mango, dika, wild_mango_tree, Irvingia_gabonensis
pepper_tree, Kirkia_wilmsii
Jamaica_quassia, bitterwood, Picrasma_excelsa, Picrasma_excelsum
quassia, bitterwood, Quassia_amara
nasturtium
garden_nasturtium, Indian_cress, Tropaeolum_majus
bush_nasturtium, Tropaeolum_minus
canarybird_flower, canarybird_vine, canary_creeper, Tropaeolum_peregrinum
bean_caper, Syrian_bean_caper, Zygophyllum_fabago
palo_santo, Bulnesia_sarmienti
lignum_vitae, Guaiacum_officinale
creosote_bush, coville, hediondilla, Larrea_tridentata
caltrop, devil's_weed, Tribulus_terestris
willow, willow_tree
osier
white_willow, Huntingdon_willow, Salix_alba
silver_willow, silky_willow, Salix_alba_sericea, Salix_sericea
golden_willow, Salix_alba_vitellina, Salix_vitellina
cricket-bat_willow, Salix_alba_caerulea
arctic_willow, Salix_arctica
weeping_willow, Babylonian_weeping_willow, Salix_babylonica
Wisconsin_weeping_willow, Salix_pendulina, Salix_blanda, Salix_pendulina_blanda
pussy_willow, Salix_discolor
sallow
goat_willow, florist's_willow, pussy_willow, Salix_caprea
peachleaf_willow, peach-leaved_willow, almond-leaves_willow, Salix_amygdaloides
almond_willow, black_Hollander, Salix_triandra, Salix_amygdalina
hoary_willow, sage_willow, Salix_candida
crack_willow, brittle_willow, snap_willow, Salix_fragilis
prairie_willow, Salix_humilis
dwarf_willow, Salix_herbacea
grey_willow, gray_willow, Salix_cinerea
arroyo_willow, Salix_lasiolepis
shining_willow, Salix_lucida
swamp_willow, black_willow, Salix_nigra
bay_willow, laurel_willow, Salix_pentandra
purple_willow, red_willow, red_osier, basket_willow, purple_osier, Salix_purpurea
balsam_willow, Salix_pyrifolia
creeping_willow, Salix_repens
Sitka_willow, silky_willow, Salix_sitchensis
dwarf_grey_willow, dwarf_gray_willow, sage_willow, Salix_tristis
bearberry_willow, Salix_uva-ursi
common_osier, hemp_willow, velvet_osier, Salix_viminalis
poplar, poplar_tree
balsam_poplar, hackmatack, tacamahac, Populus_balsamifera
white_poplar, white_aspen, abele, aspen_poplar, silver-leaved_poplar, Populus_alba
grey_poplar, gray_poplar, Populus_canescens
black_poplar, Populus_nigra
Lombardy_poplar, Populus_nigra_italica
cottonwood
Eastern_cottonwood, necklace_poplar, Populus_deltoides
black_cottonwood, Western_balsam_poplar, Populus_trichocarpa
swamp_cottonwood, black_cottonwood, downy_poplar, swamp_poplar, Populus_heterophylla
aspen
quaking_aspen, European_quaking_aspen, Populus_tremula
American_quaking_aspen, American_aspen, Populus_tremuloides
Canadian_aspen, bigtooth_aspen, bigtoothed_aspen, big-toothed_aspen, large-toothed_aspen, large_tooth_aspen, Populus_grandidentata
sandalwood_tree, true_sandalwood, Santalum_album
quandong, quandang, quandong_tree, Eucarya_acuminata, Fusanus_acuminatus
rabbitwood, buffalo_nut, Pyrularia_pubera
Loranthaceae, family_Loranthaceae, mistletoe_family
mistletoe, Loranthus_europaeus
American_mistletoe, Arceuthobium_pusillum
mistletoe, Viscum_album, Old_World_mistletoe
American_mistletoe, Phoradendron_serotinum, Phoradendron_flavescens
aalii
soapberry, soapberry_tree
wild_China_tree, Sapindus_drumondii, Sapindus_marginatus
China_tree, false_dogwood, jaboncillo, chinaberry, Sapindus_saponaria
akee, akee_tree, Blighia_sapida
soapberry_vine
heartseed, Cardiospermum_grandiflorum
balloon_vine, heart_pea, Cardiospermum_halicacabum
longan, lungen, longanberry, Dimocarpus_longan, Euphorbia_litchi, Nephelium_longana
harpullia
harpulla, Harpullia_cupanioides
Moreton_Bay_tulipwood, Harpullia_pendula
litchi, lichee, litchi_tree, Litchi_chinensis, Nephelium_litchi
Spanish_lime, Spanish_lime_tree, honey_berry, mamoncillo, genip, ginep, Melicocca_bijuga, Melicocca_bijugatus
rambutan, rambotan, rambutan_tree, Nephelium_lappaceum
pulasan, pulassan, pulasan_tree, Nephelium_mutabile
pachysandra
Allegheny_spurge, Allegheny_mountain_spurge, Pachysandra_procumbens
bittersweet, American_bittersweet, climbing_bittersweet, false_bittersweet, staff_vine, waxwork, shrubby_bittersweet, Celastrus_scandens
spindle_tree, spindleberry, spindleberry_tree
winged_spindle_tree, Euonymous_alatus
wahoo, burning_bush, Euonymus_atropurpureus
strawberry_bush, wahoo, Euonymus_americanus
evergreen_bittersweet, Euonymus_fortunei_radicans, Euonymus_radicans_vegetus
cyrilla, leatherwood, white_titi, Cyrilla_racemiflora
titi, buckwheat_tree, Cliftonia_monophylla
crowberry
maple
silver_maple, Acer_saccharinum
sugar_maple, rock_maple, Acer_saccharum
red_maple, scarlet_maple, swamp_maple, Acer_rubrum
moosewood, moose-wood, striped_maple, striped_dogwood, goosefoot_maple, Acer_pennsylvanicum
Oregon_maple, big-leaf_maple, Acer_macrophyllum
dwarf_maple, Rocky-mountain_maple, Acer_glabrum
mountain_maple, mountain_alder, Acer_spicatum
vine_maple, Acer_circinatum
hedge_maple, field_maple, Acer_campestre
Norway_maple, Acer_platanoides
sycamore, great_maple, scottish_maple, Acer_pseudoplatanus
box_elder, ash-leaved_maple, Acer_negundo
California_box_elder, Acer_negundo_Californicum
pointed-leaf_maple, Acer_argutum
Japanese_maple, full_moon_maple, Acer_japonicum
Japanese_maple, Acer_palmatum
holly
Chinese_holly, Ilex_cornuta
bearberry, possum_haw, winterberry, Ilex_decidua
inkberry, gallberry, gall-berry, evergreen_winterberry, Ilex_glabra
mate, Paraguay_tea, Ilex_paraguariensis
American_holly, Christmas_holly
low_gallberry_holly
tall_gallberry_holly
yaupon_holly
deciduous_holly
juneberry_holly
largeleaf_holly
Geogia_holly
common_winterberry_holly
smooth_winterberry_holly
cashew, cashew_tree, Anacardium_occidentale
goncalo_alves, Astronium_fraxinifolium
Venetian_sumac, wig_tree, Cotinus_coggygria
laurel_sumac, Malosma_laurina, Rhus_laurina
mango, mango_tree, Mangifera_indica
pistachio, Pistacia_vera, pistachio_tree
terebinth, Pistacia_terebinthus
mastic, mastic_tree, lentisk, Pistacia_lentiscus
Australian_sumac, Rhodosphaera_rhodanthema, Rhus_rhodanthema
sumac, sumach, shumac
smooth_sumac, scarlet_sumac, vinegar_tree, Rhus_glabra
sugar-bush, sugar_sumac, Rhus_ovata
staghorn_sumac, velvet_sumac, Virginian_sumac, vinegar_tree, Rhus_typhina
squawbush, squaw-bush, skunkbush, Rhus_trilobata
aroeira_blanca, Schinus_chichita
pepper_tree, molle, Peruvian_mastic_tree, Schinus_molle
Brazilian_pepper_tree, Schinus_terebinthifolius
hog_plum, yellow_mombin, yellow_mombin_tree, Spondias_mombin
mombin, mombin_tree, jocote, Spondias_purpurea
poison_ash, poison_dogwood, poison_sumac, Toxicodendron_vernix, Rhus_vernix
poison_ivy, markweed, poison_mercury, poison_oak, Toxicodendron_radicans, Rhus_radicans
western_poison_oak, Toxicodendron_diversilobum, Rhus_diversiloba
eastern_poison_oak, Toxicodendron_quercifolium, Rhus_quercifolia, Rhus_toxicodenedron
varnish_tree, lacquer_tree, Chinese_lacquer_tree, Japanese_lacquer_tree, Japanese_varnish_tree, Japanese_sumac, Toxicodendron_vernicifluum, Rhus_verniciflua
horse_chestnut, buckeye, Aesculus_hippocastanum
buckeye, horse_chestnut, conker
sweet_buckeye
Ohio_buckeye
dwarf_buckeye, bottlebrush_buckeye
red_buckeye
particolored_buckeye
ebony, ebony_tree, Diospyros_ebenum
marblewood, marble-wood, Andaman_marble, Diospyros_kurzii
marblewood, marble-wood
persimmon, persimmon_tree
Japanese_persimmon, kaki, Diospyros_kaki
American_persimmon, possumwood, Diospyros_virginiana
date_plum, Diospyros_lotus
buckthorn
southern_buckthorn, shittimwood, shittim, mock_orange, Bumelia_lycioides
false_buckthorn, chittamwood, chittimwood, shittimwood, black_haw, Bumelia_lanuginosa
star_apple, caimito, Chrysophyllum_cainito
satinleaf, satin_leaf, caimitillo, damson_plum, Chrysophyllum_oliviforme
balata, balata_tree, beefwood, bully_tree, Manilkara_bidentata
sapodilla, sapodilla_tree, Manilkara_zapota, Achras_zapota
gutta-percha_tree, Palaquium_gutta
gutta-percha_tree
canistel, canistel_tree, Pouteria_campechiana_nervosa
marmalade_tree, mammee, sapote, Pouteria_zapota, Calocarpum_zapota
sweetleaf, Symplocus_tinctoria
Asiatic_sweetleaf, sapphire_berry, Symplocus_paniculata
styrax
snowbell, Styrax_obassia
Japanese_snowbell, Styrax_japonicum
Texas_snowbell, Texas_snowbells, Styrax_texana
silver-bell_tree, silverbell_tree, snowdrop_tree, opossum_wood, Halesia_carolina, Halesia_tetraptera
carnivorous_plant
pitcher_plant
common_pitcher_plant, huntsman's_cup, huntsman's_cups, Sarracenia_purpurea
hooded_pitcher_plant, Sarracenia_minor
huntsman's_horn, huntsman's_horns, yellow_trumpet, yellow_pitcher_plant, trumpets, Sarracenia_flava
tropical_pitcher_plant
sundew, sundew_plant, daily_dew
Venus's_flytrap, Venus's_flytraps, Dionaea_muscipula
waterwheel_plant, Aldrovanda_vesiculosa
Drosophyllum_lusitanicum
roridula
Australian_pitcher_plant, Cephalotus_follicularis
sedum
stonecrop
rose-root, midsummer-men, Sedum_rosea
orpine, orpin, livelong, live-forever, Sedum_telephium
pinwheel, Aeonium_haworthii
Christmas_bush, Christmas_tree, Ceratopetalum_gummiferum
hortensia, Hydrangea_macrophylla_hortensis
fall-blooming_hydrangea, Hydrangea_paniculata
carpenteria, Carpenteria_californica
decumary, Decumaria_barbata, Decumaria_barbara
deutzia
philadelphus
mock_orange, syringa, Philadelphus_coronarius
saxifrage, breakstone, rockfoil
yellow_mountain_saxifrage, Saxifraga_aizoides
meadow_saxifrage, fair-maids-of-France, Saxifraga_granulata
mossy_saxifrage, Saxifraga_hypnoides
western_saxifrage, Saxifraga_occidentalis
purple_saxifrage, Saxifraga_oppositifolia
star_saxifrage, starry_saxifrage, Saxifraga_stellaris
strawberry_geranium, strawberry_saxifrage, mother-of-thousands, Saxifraga_stolonifera, Saxifraga_sarmentosam
astilbe
false_goatsbeard, Astilbe_biternata
dwarf_astilbe, Astilbe_chinensis_pumila
spirea, spiraea, Astilbe_japonica
bergenia
coast_boykinia, Boykinia_elata, Boykinia_occidentalis
golden_saxifrage, golden_spleen
umbrella_plant, Indian_rhubarb, Darmera_peltata, Peltiphyllum_peltatum
bridal_wreath, bridal-wreath, Francoa_ramosa
alumroot, alumbloom
coralbells, Heuchera_sanguinea
leatherleaf_saxifrage, Leptarrhena_pyrolifolia
woodland_star, Lithophragma_affine, Lithophragma_affinis, Tellima_affinis
prairie_star, Lithophragma_parviflorum
miterwort, mitrewort, bishop's_cap
five-point_bishop's_cap, Mitella_pentandra
parnassia, grass-of-Parnassus
bog_star, Parnassia_palustris
fringed_grass_of_Parnassus, Parnassia_fimbriata
false_alumroot, fringe_cups, Tellima_grandiflora
foamflower, coolwart, false_miterwort, false_mitrewort, Tiarella_cordifolia
false_miterwort, false_mitrewort, Tiarella_unifoliata
pickaback_plant, piggyback_plant, youth-on-age, Tolmiea_menziesii
currant, currant_bush
black_currant, European_black_currant, Ribes_nigrum
white_currant, Ribes_sativum
gooseberry, gooseberry_bush, Ribes_uva-crispa, Ribes_grossularia
plane_tree, sycamore, platan
London_plane, Platanus_acerifolia
American_sycamore, American_plane, buttonwood, Platanus_occidentalis
oriental_plane, Platanus_orientalis
California_sycamore, Platanus_racemosa
Arizona_sycamore, Platanus_wrightii
Greek_valerian, Polemonium_reptans
northern_Jacob's_ladder, Polemonium_boreale
skunkweed, skunk-weed, Polemonium_viscosum
phlox
moss_pink, mountain_phlox, moss_phlox, dwarf_phlox, Phlox_subulata
evening-snow, Linanthus_dichotomus
acanthus
bear's_breech, bear's_breeches, sea_holly, Acanthus_mollis
caricature_plant, Graptophyllum_pictum
black-eyed_Susan, black-eyed_Susan_vine, Thunbergia_alata
catalpa, Indian_bean
Catalpa_bignioides
Catalpa_speciosa
desert_willow, Chilopsis_linearis
calabash, calabash_tree, Crescentia_cujete
calabash
borage, tailwort, Borago_officinalis
common_amsinckia, Amsinckia_intermedia
anchusa
bugloss, alkanet, Anchusa_officinalis
cape_forget-me-not, Anchusa_capensis
cape_forget-me-not, Anchusa_riparia
Spanish_elm, Equador_laurel, salmwood, cypre, princewood, Cordia_alliodora
princewood, Spanish_elm, Cordia_gerascanthus
Chinese_forget-me-not, Cynoglossum_amabile
hound's-tongue, Cynoglossum_officinale
hound's-tongue, Cynoglossum_virginaticum
blueweed, blue_devil, blue_thistle, viper's_bugloss, Echium_vulgare
beggar's_lice, beggar_lice
gromwell, Lithospermum_officinale
puccoon, Lithospermum_caroliniense
Virginia_bluebell, Virginia_cowslip, Mertensia_virginica
garden_forget-me-not, Myosotis_sylvatica
forget-me-not, mouse_ear, Myosotis_scorpiodes
false_gromwell
comfrey, cumfrey
common_comfrey, boneset, Symphytum_officinale
convolvulus
bindweed
field_bindweed, wild_morning-glory, Convolvulus_arvensis
scammony, Convolvulus_scammonia
silverweed
dodder
dichondra, Dichondra_micrantha
cypress_vine, star-glory, Indian_pink, Ipomoea_quamoclit, Quamoclit_pennata
moonflower, belle_de_nuit, Ipomoea_alba
wild_potato_vine, wild_sweet_potato_vine, man-of-the-earth, manroot, scammonyroot, Ipomoea_panurata, Ipomoea_fastigiata
red_morning-glory, star_ipomoea, Ipomoea_coccinea
man-of-the-earth, Ipomoea_leptophylla
scammony, Ipomoea_orizabensis
Japanese_morning_glory, Ipomoea_nil
imperial_Japanese_morning_glory, Ipomoea_imperialis
gesneriad
gesneria
achimenes, hot_water_plant
aeschynanthus
lace-flower_vine, Alsobia_dianthiflora, Episcia_dianthiflora
columnea
episcia
gloxinia
Canterbury_bell, Gloxinia_perennis
kohleria
African_violet, Saintpaulia_ionantha
streptocarpus
Cape_primrose
waterleaf
Virginia_waterleaf, Shawnee_salad, shawny, Indian_salad, John's_cabbage, Hydrophyllum_virginianum
yellow_bells, California_yellow_bells, whispering_bells, Emmanthe_penduliflora
yerba_santa, Eriodictyon_californicum
nemophila
baby_blue-eyes, Nemophila_menziesii
five-spot, Nemophila_maculata
scorpionweed, scorpion_weed, phacelia
California_bluebell, Phacelia_campanularia
California_bluebell, whitlavia, Phacelia_minor, Phacelia_whitlavia
fiddleneck, Phacelia_tanacetifolia
fiesta_flower, Pholistoma_auritum, Nemophila_aurita
basil_thyme, basil_balm, mother_of_thyme, Acinos_arvensis, Satureja_acinos
giant_hyssop
yellow_giant_hyssop, Agastache_nepetoides
anise_hyssop, Agastache_foeniculum
Mexican_hyssop, Agastache_mexicana
bugle, bugleweed
creeping_bugle, Ajuga_reptans
erect_bugle, blue_bugle, Ajuga_genevensis
pyramid_bugle, Ajuga_pyramidalis
wood_mint
hairy_wood_mint, Blephilia_hirsuta
downy_wood_mint, Blephilia_celiata
calamint
common_calamint, Calamintha_sylvatica, Satureja_calamintha_officinalis
large-flowered_calamint, Calamintha_grandiflora, Clinopodium_grandiflorum, Satureja_grandiflora
lesser_calamint, field_balm, Calamintha_nepeta, Calamintha_nepeta_glantulosa, Satureja_nepeta, Satureja_calamintha_glandulosa
wild_basil, cushion_calamint, Clinopodium_vulgare, Satureja_vulgaris
horse_balm, horseweed, stoneroot, stone-root, richweed, stone_root, Collinsonia_canadensis
coleus, flame_nettle
country_borage, Coleus_aromaticus, Coleus_amboinicus, Plectranthus_amboinicus
painted_nettle, Joseph's_coat, Coleus_blumei, Solenostemon_blumei, Solenostemon_scutellarioides
Apalachicola_rosemary, Conradina_glabra
dragonhead, dragon's_head, Dracocephalum_parviflorum
elsholtzia
hemp_nettle, dead_nettle, Galeopsis_tetrahit
ground_ivy, alehoof, field_balm, gill-over-the-ground, runaway_robin, Glechoma_hederaceae, Nepeta_hederaceae
pennyroyal, American_pennyroyal, Hedeoma_pulegioides
hyssop, Hyssopus_officinalis
dead_nettle
white_dead_nettle, Lamium_album
henbit, Lamium_amplexicaule
English_lavender, Lavandula_angustifolia, Lavandula_officinalis
French_lavender, Lavandula_stoechas
spike_lavender, French_lavender, Lavandula_latifolia
dagga, Cape_dagga, red_dagga, wilde_dagga, Leonotis_leonurus
lion's-ear, Leonotis_nepetaefolia, Leonotis_nepetifolia
motherwort, Leonurus_cardiaca
pitcher_sage, Lepechinia_calycina, Sphacele_calycina
bugleweed, Lycopus_virginicus
water_horehound, Lycopus_americanus
gipsywort, gypsywort, Lycopus_europaeus
origanum
oregano, marjoram, pot_marjoram, wild_marjoram, winter_sweet, Origanum_vulgare
sweet_marjoram, knotted_marjoram, Origanum_majorana, Majorana_hortensis
horehound
common_horehound, white_horehound, Marrubium_vulgare
lemon_balm, garden_balm, sweet_balm, bee_balm, beebalm, Melissa_officinalis
corn_mint, field_mint, Mentha_arvensis
water-mint, water_mint, Mentha_aquatica
bergamot_mint, lemon_mint, eau_de_cologne_mint, Mentha_citrata
horsemint, Mentha_longifolia
peppermint, Mentha_piperita
spearmint, Mentha_spicata
apple_mint, applemint, Mentha_rotundifolia, Mentha_suaveolens
pennyroyal, Mentha_pulegium
yerba_buena, Micromeria_chamissonis, Micromeria_douglasii, Satureja_douglasii
molucca_balm, bells_of_Ireland, Molucella_laevis
monarda, wild_bergamot
bee_balm, beebalm, bergamot_mint, oswego_tea, Monarda_didyma
horsemint, Monarda_punctata
bee_balm, beebalm, Monarda_fistulosa
lemon_mint, horsemint, Monarda_citriodora
plains_lemon_monarda, Monarda_pectinata
basil_balm, Monarda_clinopodia
mustang_mint, Monardella_lanceolata
catmint, catnip, Nepeta_cataria
basil
beefsteak_plant, Perilla_frutescens_crispa
phlomis
Jerusalem_sage, Phlomis_fruticosa
physostegia
plectranthus
patchouli, patchouly, pachouli, Pogostemon_cablin
self-heal, heal_all, Prunella_vulgaris
mountain_mint
rosemary, Rosmarinus_officinalis
clary_sage, Salvia_clarea
purple_sage, chaparral_sage, Salvia_leucophylla
cancerweed, cancer_weed, Salvia_lyrata
common_sage, ramona, Salvia_officinalis
meadow_clary, Salvia_pratensis
clary, Salvia_sclarea
pitcher_sage, Salvia_spathacea
Mexican_mint, Salvia_divinorum
wild_sage, wild_clary, vervain_sage, Salvia_verbenaca
savory
summer_savory, Satureja_hortensis, Satureia_hortensis
winter_savory, Satureja_montana, Satureia_montana
skullcap, helmetflower
blue_pimpernel, blue_skullcap, mad-dog_skullcap, mad-dog_weed, Scutellaria_lateriflora
hedge_nettle, dead_nettle, Stachys_sylvatica
hedge_nettle, Stachys_palustris
germander
American_germander, wood_sage, Teucrium_canadense
cat_thyme, marum, Teucrium_marum
wood_sage, Teucrium_scorodonia
thyme
common_thyme, Thymus_vulgaris
wild_thyme, creeping_thyme, Thymus_serpyllum
blue_curls
turpentine_camphor_weed, camphorweed, vinegarweed, Trichostema_lanceolatum
bastard_pennyroyal, Trichostema_dichotomum
bladderwort
butterwort
genlisea
martynia, Martynia_annua
common_unicorn_plant, devil's_claw, common_devil's_claw, elephant-tusk, proboscis_flower, ram's_horn, Proboscidea_louisianica
sand_devil's_claw, Proboscidea_arenaria, Martynia_arenaria
sweet_unicorn_plant, Proboscidea_fragrans, Martynia_fragrans
figwort
snapdragon
white_snapdragon, Antirrhinum_coulterianum
yellow_twining_snapdragon, Antirrhinum_filipes
Mediterranean_snapdragon, Antirrhinum_majus
kitten-tails
Alpine_besseya, Besseya_alpina
false_foxglove, Aureolaria_pedicularia, Gerardia_pedicularia
false_foxglove, Aureolaria_virginica, Gerardia_virginica
calceolaria, slipperwort
Indian_paintbrush, painted_cup
desert_paintbrush, Castilleja_chromosa
giant_red_paintbrush, Castilleja_miniata
great_plains_paintbrush, Castilleja_sessiliflora
sulfur_paintbrush, Castilleja_sulphurea
shellflower, shell-flower, turtlehead, snakehead, snake-head, Chelone_glabra
maiden_blue-eyed_Mary, Collinsia_parviflora
blue-eyed_Mary, Collinsia_verna
foxglove, digitalis
common_foxglove, fairy_bell, fingerflower, finger-flower, fingerroot, finger-root, Digitalis_purpurea
yellow_foxglove, straw_foxglove, Digitalis_lutea
gerardia
blue_toadflax, old-field_toadflax, Linaria_canadensis
toadflax, butter-and-eggs, wild_snapdragon, devil's_flax, Linaria_vulgaris
golden-beard_penstemon, Penstemon_barbatus
scarlet_bugler, Penstemon_centranthifolius
red_shrubby_penstemon, redwood_penstemon
Platte_River_penstemon, Penstemon_cyananthus
hot-rock_penstemon, Penstemon_deustus
Jones'_penstemon, Penstemon_dolius
shrubby_penstemon, lowbush_penstemon, Penstemon_fruticosus
narrow-leaf_penstemon, Penstemon_linarioides
balloon_flower, scented_penstemon, Penstemon_palmeri
Parry's_penstemon, Penstemon_parryi
rock_penstemon, cliff_penstemon, Penstemon_rupicola
Rydberg's_penstemon, Penstemon_rydbergii
cascade_penstemon, Penstemon_serrulatus
Whipple's_penstemon, Penstemon_whippleanus
moth_mullein, Verbascum_blattaria
white_mullein, Verbascum_lychnitis
purple_mullein, Verbascum_phoeniceum
common_mullein, great_mullein, Aaron's_rod, flannel_mullein, woolly_mullein, torch, Verbascum_thapsus
veronica, speedwell
field_speedwell, Veronica_agrestis
brooklime, American_brooklime, Veronica_americana
corn_speedwell, Veronica_arvensis
brooklime, European_brooklime, Veronica_beccabunga
germander_speedwell, bird's_eye, Veronica_chamaedrys
water_speedwell, Veronica_michauxii, Veronica_anagallis-aquatica
common_speedwell, gypsyweed, Veronica_officinalis
purslane_speedwell, Veronica_peregrina
thyme-leaved_speedwell, Veronica_serpyllifolia
nightshade
horse_nettle, ball_nettle, bull_nettle, ball_nightshade, Solanum_carolinense
African_holly, Solanum_giganteum
potato_vine, Solanum_jasmoides
garden_huckleberry, wonderberry, sunberry, Solanum_nigrum_guineese, Solanum_melanocerasum, Solanum_burbankii
naranjilla, Solanum_quitoense
potato_vine, giant_potato_creeper, Solanum_wendlandii
potato_tree, Brazilian_potato_tree, Solanum_wrightii, Solanum_macranthum
belladonna, belladonna_plant, deadly_nightshade, Atropa_belladonna
bush_violet, browallia
lady-of-the-night, Brunfelsia_americana
angel's_trumpet, maikoa, Brugmansia_arborea, Datura_arborea
angel's_trumpet, Brugmansia_suaveolens, Datura_suaveolens
red_angel's_trumpet, Brugmansia_sanguinea, Datura_sanguinea
cone_pepper, Capsicum_annuum_conoides
bird_pepper, Capsicum_frutescens_baccatum, Capsicum_baccatum
day_jessamine, Cestrum_diurnum
night_jasmine, night_jessamine, Cestrum_nocturnum
tree_tomato, tamarillo
thorn_apple
jimsonweed, jimson_weed, Jamestown_weed, common_thorn_apple, apple_of_Peru, Datura_stramonium
pichi, Fabiana_imbricata
henbane, black_henbane, stinking_nightshade, Hyoscyamus_niger
Egyptian_henbane, Hyoscyamus_muticus
matrimony_vine, boxthorn
common_matrimony_vine, Duke_of_Argyll's_tea_tree, Lycium_barbarum, Lycium_halimifolium
Christmasberry, Christmas_berry, Lycium_carolinianum
plum_tomato
mandrake, devil's_apples, Mandragora_officinarum
mandrake_root, mandrake
apple_of_Peru, shoo_fly, Nicandra_physaloides
flowering_tobacco, Jasmine_tobacco, Nicotiana_alata
common_tobacco, Nicotiana_tabacum
wild_tobacco, Indian_tobacco, Nicotiana_rustica
cupflower, nierembergia
whitecup, Nierembergia_repens, Nierembergia_rivularis
petunia
large_white_petunia, Petunia_axillaris
violet-flowered_petunia, Petunia_integrifolia
hybrid_petunia, Petunia_hybrida
cape_gooseberry, purple_ground_cherry, Physalis_peruviana
strawberry_tomato, dwarf_cape_gooseberry, Physalis_pruinosa
tomatillo, jamberry, Mexican_husk_tomato, Physalis_ixocarpa
tomatillo, miltomate, purple_ground_cherry, jamberry, Physalis_philadelphica
yellow_henbane, Physalis_viscosa
cock's_eggs, Salpichroa_organifolia, Salpichroa_rhomboidea
salpiglossis
painted_tongue, Salpiglossis_sinuata
butterfly_flower, poor_man's_orchid, schizanthus
Scopolia_carniolica
chalice_vine, trumpet_flower, cupflower, Solandra_guttata
verbena, vervain
lantana
black_mangrove, Avicennia_marina
white_mangrove, Avicennia_officinalis
black_mangrove, Aegiceras_majus
teak, Tectona_grandis
spurge
sun_spurge, wartweed, wartwort, devil's_milk, Euphorbia_helioscopia
petty_spurge, devil's_milk, Euphorbia_peplus
medusa's_head, Euphorbia_medusae, Euphorbia_caput-medusae
wild_spurge, flowering_spurge, tramp's_spurge, Euphorbia_corollata
snow-on-the-mountain, snow-in-summer, ghost_weed, Euphorbia_marginata
cypress_spurge, Euphorbia_cyparissias
leafy_spurge, wolf's_milk, Euphorbia_esula
hairy_spurge, Euphorbia_hirsuta
poinsettia, Christmas_star, Christmas_flower, lobster_plant, Mexican_flameleaf, painted_leaf, Euphorbia_pulcherrima
Japanese_poinsettia, mole_plant, paint_leaf, Euphorbia_heterophylla
fire-on-the-mountain, painted_leaf, Mexican_fire_plant, Euphorbia_cyathophora
wood_spurge, Euphorbia_amygdaloides
dwarf_spurge, Euphorbia_exigua
scarlet_plume, Euphorbia_fulgens
naboom, cactus_euphorbia, Euphorbia_ingens
crown_of_thorns, Christ_thorn, Christ_plant, Euphorbia_milii
toothed_spurge, Euphorbia_dentata
three-seeded_mercury, Acalypha_virginica
croton, Croton_tiglium
cascarilla, Croton_eluteria
cascarilla_bark, eleuthera_bark, sweetwood_bark
castor-oil_plant, castor_bean_plant, palma_christi, palma_christ, Ricinus_communis
spurge_nettle, tread-softly, devil_nettle, pica-pica, Cnidoscolus_urens, Jatropha_urens, Jatropha_stimulosus
physic_nut, Jatropha_curcus
Para_rubber_tree, caoutchouc_tree, Hevea_brasiliensis
cassava, casava
bitter_cassava, manioc, mandioc, mandioca, tapioca_plant, gari, Manihot_esculenta, Manihot_utilissima
cassava, manioc
sweet_cassava, Manihot_dulcis
candlenut, varnish_tree, Aleurites_moluccana
tung_tree, tung, tung-oil_tree, Aleurites_fordii
slipper_spurge, slipper_plant
candelilla, Pedilanthus_bracteatus, Pedilanthus_pavonis
Jewbush, Jew-bush, Jew_bush, redbird_cactus, redbird_flower, Pedilanthus_tithymaloides
jumping_bean, jumping_seed, Mexican_jumping_bean
camellia, camelia
japonica, Camellia_japonica
umbellifer, umbelliferous_plant
wild_parsley
fool's_parsley, lesser_hemlock, Aethusa_cynapium
dill, Anethum_graveolens
angelica, angelique
garden_angelica, archangel, Angelica_Archangelica
wild_angelica, Angelica_sylvestris
chervil, beaked_parsley, Anthriscus_cereifolium
cow_parsley, wild_chervil, Anthriscus_sylvestris
wild_celery, Apium_graveolens
astrantia, masterwort
greater_masterwort, Astrantia_major
caraway, Carum_carvi
whorled_caraway
water_hemlock, Cicuta_verosa
spotted_cowbane, spotted_hemlock, spotted_water_hemlock
hemlock, poison_hemlock, poison_parsley, California_fern, Nebraska_fern, winter_fern, Conium_maculatum
earthnut, Conopodium_denudatum
cumin, Cuminum_cyminum
wild_carrot, Queen_Anne's_lace, Daucus_carota
eryngo, eringo
sea_holly, sea_holm, sea_eryngium, Eryngium_maritimum
button_snakeroot, Eryngium_aquaticum
rattlesnake_master, rattlesnake's_master, button_snakeroot, Eryngium_yuccifolium
fennel
common_fennel, Foeniculum_vulgare
Florence_fennel, Foeniculum_dulce, Foeniculum_vulgare_dulce
cow_parsnip, hogweed, Heracleum_sphondylium
lovage, Levisticum_officinale
sweet_cicely, Myrrhis_odorata
water_fennel, Oenanthe_aquatica
parsnip, Pastinaca_sativa
cultivated_parsnip
wild_parsnip, madnep
parsley, Petroselinum_crispum
Italian_parsley, flat-leaf_parsley, Petroselinum_crispum_neapolitanum
Hamburg_parsley, turnip-rooted_parsley, Petroselinum_crispum_tuberosum
anise, anise_plant, Pimpinella_anisum
sanicle, snakeroot
purple_sanicle, Sanicula_bipinnatifida
European_sanicle, Sanicula_Europaea
water_parsnip, Sium_suave
greater_water_parsnip, Sium_latifolium
skirret, Sium_sisarum
dogwood, dogwood_tree, cornel
common_white_dogwood, eastern_flowering_dogwood, Cornus_florida
red_osier, red_osier_dogwood, red_dogwood, American_dogwood, redbrush, Cornus_stolonifera
silky_dogwood, Cornus_obliqua
silky_cornel, silky_dogwood, Cornus_amomum
common_European_dogwood, red_dogwood, blood-twig, pedwood, Cornus_sanguinea
bunchberry, dwarf_cornel, crackerberry, pudding_berry, Cornus_canadensis
cornelian_cherry, Cornus_mas
puka, Griselinia_lucida
kapuka, Griselinia_littoralis
valerian
common_valerian, garden_heliotrope, Valeriana_officinalis
common_corn_salad, lamb's_lettuce, Valerianella_olitoria, Valerianella_locusta
red_valerian, French_honeysuckle, Centranthus_ruber
filmy_fern, film_fern
bristle_fern, filmy_fern
hare's-foot_bristle_fern, Trichomanes_boschianum
Killarney_fern, Trichomanes_speciosum
kidney_fern, Trichomanes_reniforme
flowering_fern, osmund
royal_fern, royal_osmund, king_fern, ditch_fern, French_bracken, Osmunda_regalis
interrupted_fern, Osmunda_clatonia
crape_fern, Prince-of-Wales_fern, Prince-of-Wales_feather, Prince-of-Wales_plume, Leptopteris_superba, Todea_superba
crepe_fern, king_fern, Todea_barbara
curly_grass, curly_grass_fern, Schizaea_pusilla
pine_fern, Anemia_adiantifolia
climbing_fern
creeping_fern, Hartford_fern, Lygodium_palmatum
climbing_maidenhair, climbing_maidenhair_fern, snake_fern, Lygodium_microphyllum
scented_fern, Mohria_caffrorum
clover_fern, pepperwort
nardoo, nardo, common_nardoo, Marsilea_drummondii
water_clover, Marsilea_quadrifolia
pillwort, Pilularia_globulifera
regnellidium, Regnellidium_diphyllum
floating-moss, Salvinia_rotundifolia, Salvinia_auriculata
mosquito_fern, floating_fern, Carolina_pond_fern, Azolla_caroliniana
adder's_tongue, adder's_tongue_fern
ribbon_fern, Ophioglossum_pendulum
grape_fern
daisyleaf_grape_fern, daisy-leaved_grape_fern, Botrychium_matricariifolium
leathery_grape_fern, Botrychium_multifidum
rattlesnake_fern, Botrychium_virginianum
flowering_fern, Helminthostachys_zeylanica
powdery_mildew
Dutch_elm_fungus, Ceratostomella_ulmi
ergot, Claviceps_purpurea
rye_ergot
black_root_rot_fungus, Xylaria_mali
dead-man's-fingers, dead-men's-fingers, Xylaria_polymorpha
sclerotinia
brown_cup
earthball, false_truffle, puffball, hard-skinned_puffball
Scleroderma_citrinum, Scleroderma_aurantium
Scleroderma_flavidium, star_earthball
Scleroderma_bovista, smooth_earthball
Podaxaceae
stalked_puffball
stalked_puffball
false_truffle
Rhizopogon_idahoensis
Truncocolumella_citrina
mucor
rhizopus
bread_mold, Rhizopus_nigricans
slime_mold, slime_mould
true_slime_mold, acellular_slime_mold, plasmodial_slime_mold, myxomycete
cellular_slime_mold
dictostylium
pond-scum_parasite
potato_wart_fungus, Synchytrium_endobioticum
white_fungus, Saprolegnia_ferax
water_mold
downy_mildew, false_mildew
blue_mold_fungus, Peronospora_tabacina
onion_mildew, Peronospora_destructor
tobacco_mildew, Peronospora_hyoscyami
white_rust
pythium
damping_off_fungus, Pythium_debaryanum
Phytophthora_citrophthora
Phytophthora_infestans
clubroot_fungus, Plasmodiophora_brassicae
Geglossaceae
Sarcosomataceae
Rufous_rubber_cup
devil's_cigar
devil's_urn
truffle, earthnut, earth-ball
club_fungus
coral_fungus
tooth_fungus
lichen
ascolichen
basidiolichen
lecanora
manna_lichen
archil, orchil
roccella, Roccella_tinctoria
beard_lichen, beard_moss, Usnea_barbata
horsehair_lichen, horsetail_lichen
reindeer_moss, reindeer_lichen, arctic_moss, Cladonia_rangiferina
crottle, crottal, crotal
Iceland_moss, Iceland_lichen, Cetraria_islandica
fungus
promycelium
true_fungus
basidiomycete, basidiomycetous_fungi
mushroom
agaric
mushroom
mushroom
toadstool
horse_mushroom, Agaricus_arvensis
meadow_mushroom, field_mushroom, Agaricus_campestris
shiitake, shiitake_mushroom, Chinese_black_mushroom, golden_oak_mushroom, Oriental_black_mushroom, Lentinus_edodes
scaly_lentinus, Lentinus_lepideus
royal_agaric, Caesar's_agaric, Amanita_caesarea
false_deathcap, Amanita_mappa
fly_agaric, Amanita_muscaria
death_cap, death_cup, death_angel, destroying_angel, Amanita_phalloides
blushing_mushroom, blusher, Amanita_rubescens
destroying_angel, Amanita_verna
chanterelle, chantarelle, Cantharellus_cibarius
floccose_chanterelle, Cantharellus_floccosus
pig's_ears, Cantharellus_clavatus
cinnabar_chanterelle, Cantharellus_cinnabarinus
jack-o-lantern_fungus, jack-o-lantern, jack-a-lantern, Omphalotus_illudens
inky_cap, inky-cap_mushroom, Coprinus_atramentarius
shaggymane, shaggy_cap, shaggymane_mushroom, Coprinus_comatus
milkcap, Lactarius_delicioso
fairy-ring_mushroom, Marasmius_oreades
fairy_ring, fairy_circle
oyster_mushroom, oyster_fungus, oyster_agaric, Pleurotus_ostreatus
olive-tree_agaric, Pleurotus_phosphoreus
Pholiota_astragalina
Pholiota_aurea, golden_pholiota
Pholiota_destruens
Pholiota_flammans
Pholiota_flavida
nameko, viscid_mushroom, Pholiota_nameko
Pholiota_squarrosa-adiposa
Pholiota_squarrosa, scaly_pholiota
Pholiota_squarrosoides
Stropharia_ambigua
Stropharia_hornemannii
Stropharia_rugoso-annulata
gill_fungus
Entoloma_lividum, Entoloma_sinuatum
Entoloma_aprile
Chlorophyllum_molybdites
lepiota
parasol_mushroom, Lepiota_procera
poisonous_parasol, Lepiota_morgani
Lepiota_naucina
Lepiota_rhacodes
American_parasol, Lepiota_americana
Lepiota_rubrotincta
Lepiota_clypeolaria
onion_stem, Lepiota_cepaestipes
pink_disease_fungus, Corticium_salmonicolor
bottom_rot_fungus, Corticium_solani
potato_fungus, Pellicularia_filamentosa, Rhizoctinia_solani
coffee_fungus, Pellicularia_koleroga
blewits, Clitocybe_nuda
sandy_mushroom, Tricholoma_populinum
Tricholoma_pessundatum
Tricholoma_sejunctum
man-on-a-horse, Tricholoma_flavovirens
Tricholoma_venenata
Tricholoma_pardinum
Tricholoma_vaccinum
Tricholoma_aurantium
Volvaria_bombycina
Pluteus_aurantiorugosus
Pluteus_magnus, sawdust_mushroom
deer_mushroom, Pluteus_cervinus
straw_mushroom, Chinese_mushroom, Volvariella_volvacea
Volvariella_bombycina
Clitocybe_clavipes
Clitocybe_dealbata
Clitocybe_inornata
Clitocybe_robusta, Clytocybe_alba
Clitocybe_irina, Tricholoma_irinum, Lepista_irina
Clitocybe_subconnexa
winter_mushroom, Flammulina_velutipes
mycelium
sclerotium
sac_fungus
ascomycete, ascomycetous_fungus
Clavicipitaceae, grainy_club_mushrooms
grainy_club
yeast
baker's_yeast, brewer's_yeast, Saccharomyces_cerevisiae
wine-maker's_yeast, Saccharomyces_ellipsoides
Aspergillus_fumigatus
brown_root_rot_fungus, Thielavia_basicola
discomycete, cup_fungus
Leotia_lubrica
Mitrula_elegans
Sarcoscypha_coccinea, scarlet_cup
Caloscypha_fulgens
Aleuria_aurantia, orange_peel_fungus
elf_cup
Peziza_domicilina
blood_cup, fairy_cup, Peziza_coccinea
Urnula_craterium, urn_fungus
Galiella_rufa
Jafnea_semitosta
morel
common_morel, Morchella_esculenta, sponge_mushroom, sponge_morel
Disciotis_venosa, cup_morel
Verpa, bell_morel
Verpa_bohemica, early_morel
Verpa_conica, conic_Verpa
black_morel, Morchella_conica, conic_morel, Morchella_angusticeps, narrowhead_morel
Morchella_crassipes, thick-footed_morel
Morchella_semilibera, half-free_morel, cow's_head
Wynnea_americana
Wynnea_sparassoides
false_morel
lorchel
helvella
Helvella_crispa, miter_mushroom
Helvella_acetabulum
Helvella_sulcata
discina
gyromitra
Gyromitra_californica, California_false_morel
Gyromitra_sphaerospora, round-spored_gyromitra
Gyromitra_esculenta, brain_mushroom, beefsteak_morel
Gyromitra_infula, saddled-shaped_false_morel
Gyromitra_fastigiata, Gyromitra_brunnea
Gyromitra_gigas
gasteromycete, gastromycete
stinkhorn, carrion_fungus
common_stinkhorn, Phallus_impudicus
Phallus_ravenelii
dog_stinkhorn, Mutinus_caninus
Calostoma_lutescens
Calostoma_cinnabarina
Calostoma_ravenelii
stinky_squid, Pseudocolus_fusiformis
puffball, true_puffball
giant_puffball, Calvatia_gigantea
earthstar
Geastrum_coronatum
Radiigera_fuscogleba
Astreus_pteridis
Astreus_hygrometricus
bird's-nest_fungus
Gastrocybe_lateritia
Macowanites_americanus
polypore, pore_fungus, pore_mushroom
bracket_fungus, shelf_fungus
Albatrellus_dispansus
Albatrellus_ovinus, sheep_polypore
Neolentinus_ponderosus
Oligoporus_leucospongia
Polyporus_tenuiculus
hen-of-the-woods, hen_of_the_woods, Polyporus_frondosus, Grifola_frondosa
Polyporus_squamosus, scaly_polypore
beefsteak_fungus, Fistulina_hepatica
agaric, Fomes_igniarius
bolete
Boletus_chrysenteron
Boletus_edulis
Frost's_bolete, Boletus_frostii
Boletus_luridus
Boletus_mirabilis
Boletus_pallidus
Boletus_pulcherrimus
Boletus_pulverulentus
Boletus_roxanae
Boletus_subvelutipes
Boletus_variipes
Boletus_zelleri
Fuscoboletinus_paluster
Fuscoboletinus_serotinus
Leccinum_fibrillosum
Suillus_albivelatus
old-man-of-the-woods, Strobilomyces_floccopus
Boletellus_russellii
jelly_fungus
snow_mushroom, Tremella_fuciformis
witches'_butter, Tremella_lutescens
Tremella_foliacea
Tremella_reticulata
Jew's-ear, Jew's-ears, ear_fungus, Auricularia_auricula
rust, rust_fungus
aecium
flax_rust, flax_rust_fungus, Melampsora_lini
blister_rust, Cronartium_ribicola
wheat_rust, Puccinia_graminis
apple_rust, cedar-apple_rust, Gymnosporangium_juniperi-virginianae
smut, smut_fungus
covered_smut
loose_smut
cornsmut, corn_smut
boil_smut, Ustilago_maydis
Sphacelotheca, genus_Sphacelotheca
head_smut, Sphacelotheca_reiliana
bunt, Tilletia_caries
bunt, stinking_smut, Tilletia_foetida
onion_smut, Urocystis_cepulae
flag_smut_fungus
wheat_flag_smut, Urocystis_tritici
felt_fungus, Septobasidium_pseudopedicellatum
waxycap
Hygrocybe_acutoconica, conic_waxycap
Hygrophorus_borealis
Hygrophorus_caeruleus
Hygrophorus_inocybiformis
Hygrophorus_kauffmanii
Hygrophorus_marzuolus
Hygrophorus_purpurascens
Hygrophorus_russula
Hygrophorus_sordidus
Hygrophorus_tennesseensis
Hygrophorus_turundus
Neohygrophorus_angelesianus
Cortinarius_armillatus
Cortinarius_atkinsonianus
Cortinarius_corrugatus
Cortinarius_gentilis
Cortinarius_mutabilis, purple-staining_Cortinarius
Cortinarius_semisanguineus
Cortinarius_subfoetidus
Cortinarius_violaceus
Gymnopilus_spectabilis
Gymnopilus_validipes
Gymnopilus_ventricosus
mold, mould
mildew
verticillium
monilia
candida
Candida_albicans, Monilia_albicans
blastomycete
yellow_spot_fungus, Cercospora_kopkei
green_smut_fungus, Ustilaginoidea_virens
dry_rot
rhizoctinia
houseplant
bedder, bedding_plant
succulent
cultivar
weed
wort
brier
aril
sporophyll, sporophyl
sporangium, spore_case, spore_sac
sporangiophore
ascus
ascospore
arthrospore
eusporangium
tetrasporangium
gametangium
sorus
sorus
partial_veil
lignum
vascular_ray, medullary_ray
phloem, bast
evergreen, evergreen_plant
deciduous_plant
poisonous_plant
vine
creeper
tendril
root_climber
lignosae
arborescent_plant
snag
tree
timber_tree
treelet
arbor
bean_tree
pollard
sapling
shade_tree
gymnospermous_tree
conifer, coniferous_tree
angiospermous_tree, flowering_tree
nut_tree
spice_tree
fever_tree
stump, tree_stump
bonsai
ming_tree
ming_tree
undershrub
subshrub, suffrutex
bramble
liana
geophyte
desert_plant, xerophyte, xerophytic_plant, xerophile, xerophilous_plant
mesophyte, mesophytic_plant
marsh_plant, bog_plant, swamp_plant
hemiepiphyte, semiepiphyte
strangler, strangler_tree
lithophyte, lithophytic_plant
saprobe
autophyte, autophytic_plant, autotroph, autotrophic_organism
root
taproot
prop_root
prophyll
rootstock
quickset
stolon, runner, offset
tuberous_plant
rhizome, rootstock, rootstalk
rachis
caudex
cladode, cladophyll, phylloclad, phylloclade
receptacle
scape, flower_stalk
umbel
petiole, leafstalk
peduncle
pedicel, pedicle
flower_cluster
raceme
panicle
thyrse, thyrsus
cyme
cymule
glomerule
scorpioid_cyme
ear, spike, capitulum
spadix
bulbous_plant
bulbil, bulblet
cormous_plant
fruit
fruitlet
seed
bean
nut
nutlet
kernel, meat
syconium
berry
aggregate_fruit, multiple_fruit, syncarp
simple_fruit, bacca
acinus
drupe, stone_fruit
drupelet
pome, false_fruit
pod, seedpod
loment
pyxidium, pyxis
husk
cornhusk
pod, cod, seedcase
accessory_fruit, pseudocarp
buckthorn
buckthorn_berry, yellow_berry
cascara_buckthorn, bearberry, bearwood, chittamwood, chittimwood, Rhamnus_purshianus
cascara, cascara_sagrada, chittam_bark, chittem_bark
Carolina_buckthorn, indian_cherry, Rhamnus_carolinianus
coffeeberry, California_buckthorn, California_coffee, Rhamnus_californicus
redberry, red-berry, Rhamnus_croceus
nakedwood
jujube, jujube_bush, Christ's-thorn, Jerusalem_thorn, Ziziphus_jujuba
Christ's-thorn, Jerusalem_thorn, Paliurus_spina-christi
hazel, hazel_tree, Pomaderris_apetala
fox_grape, Vitis_labrusca
muscadine, Vitis_rotundifolia
vinifera, vinifera_grape, common_grape_vine, Vitis_vinifera
Pinot_blanc
Sauvignon_grape
Sauvignon_blanc
Muscadet
Riesling
Zinfandel
Chenin_blanc
malvasia
Verdicchio
Boston_ivy, Japanese_ivy, Parthenocissus_tricuspidata
Virginia_creeper, American_ivy, woodbine, Parthenocissus_quinquefolia
true_pepper, pepper_vine
betel, betel_pepper, Piper_betel
cubeb
schizocarp
peperomia
watermelon_begonia, Peperomia_argyreia, Peperomia_sandersii
yerba_mansa, Anemopsis_californica
pinna, pinnule
frond
bract
bracteole, bractlet
involucre
glume
palmate_leaf
pinnate_leaf
bijugate_leaf, bijugous_leaf, twice-pinnate
decompound_leaf
acuminate_leaf
deltoid_leaf
ensiform_leaf
linear_leaf, elongate_leaf
lyrate_leaf
obtuse_leaf
oblanceolate_leaf
pandurate_leaf, panduriform_leaf
reniform_leaf
spatulate_leaf
even-pinnate_leaf, abruptly-pinnate_leaf
odd-pinnate_leaf
pedate_leaf
crenate_leaf
dentate_leaf
denticulate_leaf
erose_leaf
runcinate_leaf
prickly-edged_leaf
deadwood
haulm, halm
branchlet, twig, sprig
osier
giant_scrambling_fern, Diplopterygium_longissimum
umbrella_fern, fan_fern, Sticherus_flabellatus, Gleichenia_flabellata
floating_fern, water_sprite, Ceratopteris_pteridioides
polypody
licorice_fern, Polypodium_glycyrrhiza
grey_polypody, gray_polypody, resurrection_fern, Polypodium_polypodioides
leatherleaf, leathery_polypody, coast_polypody, Polypodium_scouleri
rock_polypody, rock_brake, American_wall_fern, Polypodium_virgianum
common_polypody, adder's_fern, wall_fern, golden_maidenhair, golden_polypody, sweet_fern, Polypodium_vulgare
bear's-paw_fern, Aglaomorpha_meyeniana
strap_fern
Florida_strap_fern, cow-tongue_fern, hart's-tongue_fern
basket_fern, Drynaria_rigidula
snake_polypody, Microgramma-piloselloides
climbing_bird's_nest_fern, Microsorium_punctatum
golden_polypody, serpent_fern, rabbit's-foot_fern, Phlebodium_aureum, Polypodium_aureum
staghorn_fern
South_American_staghorn, Platycerium_andinum
common_staghorn_fern, elkhorn_fern, Platycerium_bifurcatum, Platycerium_alcicorne
felt_fern, tongue_fern, Pyrrosia_lingua, Cyclophorus_lingua
potato_fern, Solanopteris_bifrons
myrmecophyte
grass_fern, ribbon_fern, Vittaria_lineata
spleenwort
black_spleenwort, Asplenium_adiantum-nigrum
bird's_nest_fern, Asplenium_nidus
ebony_spleenwort, Scott's_Spleenwort, Asplenium_platyneuron
black-stem_spleenwort, black-stemmed_spleenwort, little_ebony_spleenwort
walking_fern, walking_leaf, Asplenium_rhizophyllum, Camptosorus_rhizophyllus
green_spleenwort, Asplenium_viride
mountain_spleenwort, Asplenium_montanum
lobed_spleenwort, Asplenium_pinnatifidum
lanceolate_spleenwort, Asplenium_billotii
hart's-tongue, hart's-tongue_fern, Asplenium_scolopendrium, Phyllitis_scolopendrium
scale_fern, scaly_fern, Asplenium_ceterach, Ceterach_officinarum
scolopendrium
deer_fern, Blechnum_spicant
doodia, rasp_fern
chain_fern
Virginia_chain_fern, Woodwardia_virginica
silver_tree_fern, sago_fern, black_tree_fern, Cyathea_medullaris
davallia
hare's-foot_fern
Canary_Island_hare's_foot_fern, Davallia_canariensis
squirrel's-foot_fern, ball_fern, Davalia_bullata, Davalia_bullata_mariesii, Davallia_Mariesii
bracken, Pteridium_esculentum
soft_tree_fern, Dicksonia_antarctica
Scythian_lamb, Cibotium_barometz
false_bracken, Culcita_dubia
thyrsopteris, Thyrsopteris_elegans
shield_fern, buckler_fern
broad_buckler-fern, Dryopteris_dilatata
fragrant_cliff_fern, fragrant_shield_fern, fragrant_wood_fern, Dryopteris_fragrans
Goldie's_fern, Goldie's_shield_fern, goldie's_wood_fern, Dryopteris_goldiana
wood_fern, wood-fern, woodfern
male_fern, Dryopteris_filix-mas
marginal_wood_fern, evergreen_wood_fern, leatherleaf_wood_fern, Dryopteris_marginalis
mountain_male_fern, Dryopteris_oreades
lady_fern, Athyrium_filix-femina
Alpine_lady_fern, Athyrium_distentifolium
silvery_spleenwort, glade_fern, narrow-leaved_spleenwort, Athyrium_pycnocarpon, Diplazium_pycnocarpon
holly_fern, Cyrtomium_aculeatum, Polystichum_aculeatum
bladder_fern
brittle_bladder_fern, brittle_fern, fragile_fern, Cystopteris_fragilis
mountain_bladder_fern, Cystopteris_montana
bulblet_fern, bulblet_bladder_fern, berry_fern, Cystopteris_bulbifera
silvery_spleenwort, Deparia_acrostichoides, Athyrium_thelypteroides
oak_fern, Gymnocarpium_dryopteris, Thelypteris_dryopteris
limestone_fern, northern_oak_fern, Gymnocarpium_robertianum
ostrich_fern, shuttlecock_fern, fiddlehead, Matteuccia_struthiopteris, Pteretis_struthiopteris, Onoclea_struthiopteris
hart's-tongue, hart's-tongue_fern, Olfersia_cervina, Polybotrya_cervina, Polybotria_cervina
sensitive_fern, bead_fern, Onoclea_sensibilis
Christmas_fern, canker_brake, dagger_fern, evergreen_wood_fern, Polystichum_acrostichoides
holly_fern
Braun's_holly_fern, prickly_shield_fern, Polystichum_braunii
western_holly_fern, Polystichum_scopulinum
soft_shield_fern, Polystichum_setiferum
leather_fern, leatherleaf_fern, ten-day_fern, Rumohra_adiantiformis, Polystichum_adiantiformis
button_fern, Tectaria_cicutaria
Indian_button_fern, Tectaria_macrodonta
woodsia
rusty_woodsia, fragrant_woodsia, oblong_woodsia, Woodsia_ilvensis
Alpine_woodsia, northern_woodsia, flower-cup_fern, Woodsia_alpina
smooth_woodsia, Woodsia_glabella
Boston_fern, Nephrolepis_exaltata, Nephrolepis_exaltata_bostoniensis
basket_fern, toothed_sword_fern, Nephrolepis_pectinata
golden_fern, leather_fern, Acrostichum_aureum
maidenhair, maidenhair_fern
common_maidenhair, Venushair, Venus'-hair_fern, southern_maidenhair, Venus_maidenhair, Adiantum_capillus-veneris
American_maidenhair_fern, five-fingered_maidenhair_fern, Adiantum_pedatum
Bermuda_maidenhair, Bermuda_maidenhair_fern, Adiantum_bellum
brittle_maidenhair, brittle_maidenhair_fern, Adiantum_tenerum
Farley_maidenhair, Farley_maidenhair_fern, Barbados_maidenhair, glory_fern, Adiantum_tenerum_farleyense
annual_fern, Jersey_fern, Anogramma_leptophylla
lip_fern, lipfern
smooth_lip_fern, Alabama_lip_fern, Cheilanthes_alabamensis
lace_fern, Cheilanthes_gracillima
wooly_lip_fern, hairy_lip_fern, Cheilanthes_lanosa
southwestern_lip_fern, Cheilanthes_eatonii
bamboo_fern, Coniogramme_japonica
American_rock_brake, American_parsley_fern, Cryptogramma_acrostichoides
European_parsley_fern, mountain_parsley_fern, Cryptogramma_crispa
hand_fern, Doryopteris_pedata
cliff_brake, cliff-brake, rock_brake
coffee_fern, Pellaea_andromedifolia
purple_rock_brake, Pellaea_atropurpurea
bird's-foot_fern, Pellaea_mucronata, Pellaea_ornithopus
button_fern, Pellaea_rotundifolia
silver_fern, Pityrogramma_argentea
golden_fern, Pityrogramma_calomelanos_aureoflava
gold_fern, Pityrogramma_chrysophylla
Pteris_cretica
spider_brake, spider_fern, Pteris_multifida
ribbon_fern, spider_fern, Pteris_serrulata
potato_fern, Marattia_salicina
angiopteris, giant_fern, Angiopteris_evecta
skeleton_fork_fern, Psilotum_nudum
horsetail
common_horsetail, field_horsetail, Equisetum_arvense
swamp_horsetail, water_horsetail, Equisetum_fluviatile
scouring_rush, rough_horsetail, Equisetum_hyemale, Equisetum_hyemale_robustum, Equisetum_robustum
marsh_horsetail, Equisetum_palustre
wood_horsetail, Equisetum_Sylvaticum
variegated_horsetail, variegated_scouring_rush, Equisetum_variegatum
club_moss, club-moss, lycopod
shining_clubmoss, Lycopodium_lucidulum
alpine_clubmoss, Lycopodium_alpinum
fir_clubmoss, mountain_clubmoss, little_clubmoss, Lycopodium_selago
ground_cedar, staghorn_moss, Lycopodium_complanatum
ground_fir, princess_pine, tree_clubmoss, Lycopodium_obscurum
foxtail_grass, Lycopodium_alopecuroides
spikemoss, spike_moss, little_club_moss
meadow_spikemoss, basket_spikemoss, Selaginella_apoda
desert_selaginella, Selaginella_eremophila
resurrection_plant, rose_of_Jericho, Selaginella_lepidophylla
florida_selaginella, Selaginella_eatonii
quillwort
earthtongue, earth-tongue
snuffbox_fern, meadow_fern, Thelypteris_palustris_pubescens, Dryopteris_thelypteris_pubescens
christella
mountain_fern, Oreopteris_limbosperma, Dryopteris_oreopteris
New_York_fern, Parathelypteris_novae-boracensis, Dryopteris_noveboracensis
Massachusetts_fern, Parathelypteris_simulata, Thelypteris_simulata
beech_fern
broad_beech_fern, southern_beech_fern, Phegopteris_hexagonoptera, Dryopteris_hexagonoptera, Thelypteris_hexagonoptera
long_beech_fern, narrow_beech_fern, northern_beech_fern, Phegopteris_connectilis, Dryopteris_phegopteris, Thelypteris_phegopteris
shoestring_fungus
Armillaria_caligata, booted_armillaria
Armillaria_ponderosa, white_matsutake
Armillaria_zelleri
honey_mushroom, honey_fungus, Armillariella_mellea
milkweed, silkweed
white_milkweed, Asclepias_albicans
poke_milkweed, Asclepias_exaltata
swamp_milkweed, Asclepias_incarnata
Mead's_milkweed, Asclepias_meadii, Asclepia_meadii
purple_silkweed, Asclepias_purpurascens
showy_milkweed, Asclepias_speciosa
poison_milkweed, horsetail_milkweed, Asclepias_subverticillata
butterfly_weed, orange_milkweed, chigger_flower, chiggerflower, pleurisy_root, tuber_root, Indian_paintbrush, Asclepias_tuberosa
whorled_milkweed, Asclepias_verticillata
cruel_plant, Araujia_sericofera
wax_plant, Hoya_carnosa
silk_vine, Periploca_graeca
stapelia, carrion_flower, starfish_flower
Stapelias_asterias
stephanotis
Madagascar_jasmine, waxflower, Stephanotis_floribunda
negro_vine, Vincetoxicum_hirsutum, Vincetoxicum_negrum
zygospore
tree_of_knowledge
orangery
pocketbook
shit, dump
cordage
yard, pace
extremum, peak
leaf_shape, leaf_form
equilateral
figure
pencil
plane_figure, two-dimensional_figure
solid_figure, three-dimensional_figure
line
bulb
convex_shape, convexity
concave_shape, concavity, incurvation, incurvature
cylinder
round_shape
heart
polygon, polygonal_shape
convex_polygon
concave_polygon
reentrant_polygon, reentering_polygon
amorphous_shape
closed_curve
simple_closed_curve, Jordan_curve
S-shape
wave, undulation
extrados
hook, crotchet
envelope
bight
diameter
cone, conoid, cone_shape
funnel, funnel_shape
oblong
circle
circle
equator
scallop, crenation, crenature, crenel, crenelle
ring, halo, annulus, doughnut, anchor_ring
loop
bight
helix, spiral
element_of_a_cone
element_of_a_cylinder
ellipse, oval
quadrate
triangle, trigon, trilateral
acute_triangle, acute-angled_triangle
isosceles_triangle
obtuse_triangle, obtuse-angled_triangle
right_triangle, right-angled_triangle
scalene_triangle
parallel
trapezoid
star
pentagon
hexagon
heptagon
octagon
nonagon
decagon
rhombus, rhomb, diamond
spherical_polygon
spherical_triangle
convex_polyhedron
concave_polyhedron
cuboid
quadrangular_prism
bell, bell_shape, campana
angular_distance
true_anomaly
spherical_angle
angle_of_refraction
acute_angle
groove, channel
rut
bulge, bump, hump, swelling, gibbosity, gibbousness, jut, prominence, protuberance, protrusion, extrusion, excrescence
belly
bow, arc
crescent
ellipsoid
hypotenuse
balance, equilibrium, equipoise, counterbalance
conformation
symmetry, proportion
spheroid, ellipsoid_of_revolution
spherule
toroid
column, tower, pillar
barrel, drum
pipe, tube
pellet
bolus
dewdrop
ridge
rim
taper
boundary, edge, bound
incisure, incisura
notch
wrinkle, furrow, crease, crinkle, seam, line
dermatoglyphic
frown_line
line_of_life, life_line, lifeline
line_of_heart, heart_line, love_line, mensal_line
crevice, cranny, crack, fissure, chap
cleft
roulette, line_roulette
node
tree, tree_diagram
stemma
brachium
fork, crotch
block, cube
ovoid
tetrahedron
pentahedron
hexahedron
regular_polyhedron, regular_convex_solid, regular_convex_polyhedron, Platonic_body, Platonic_solid, ideal_solid
polyhedral_angle
cube, regular_hexahedron
truncated_pyramid
truncated_cone
tail, tail_end
tongue, knife
trapezohedron
wedge, wedge_shape, cuneus
keel
place, shoes
herpes
chlamydia
wall
micronutrient
chyme
ragweed_pollen
pina_cloth
chlorobenzylidenemalononitrile, CS_gas
carbon, C, atomic_number_6
charcoal, wood_coal
rock, stone
gravel, crushed_rock
aflatoxin
alpha-tocopheral
leopard
bricks_and_mortar
lagging
hydraulic_cement, Portland_cement
choline
concrete
glass_wool
soil, dirt
high_explosive
litter
fish_meal
Greek_fire
culture_medium, medium
agar, nutrient_agar
blood_agar
hip_tile, hipped_tile
hyacinth, jacinth
hydroxide_ion, hydroxyl_ion
ice, water_ice
inositol
linoleum, lino
lithia_water
lodestone, loadstone
pantothenic_acid, pantothen
paper
papyrus
pantile
blacktop, blacktopping
tarmacadam, tarmac
paving, pavement, paving_material
plaster
poison_gas
ridge_tile
roughcast
sand
spackle, spackling_compound
render
wattle_and_daub
stucco
tear_gas, teargas, lacrimator, lachrymator
toilet_tissue, toilet_paper, bathroom_tissue
linseed, flaxseed
vitamin
fat-soluble_vitamin
water-soluble_vitamin
vitamin_A, antiophthalmic_factor, axerophthol, A
vitamin_A1, retinol
vitamin_A2, dehydroretinol
B-complex_vitamin, B_complex, vitamin_B_complex, vitamin_B, B_vitamin, B
vitamin_B1, thiamine, thiamin, aneurin, antiberiberi_factor
vitamin_B12, cobalamin, cyanocobalamin, antipernicious_anemia_factor
vitamin_B2, vitamin_G, riboflavin, lactoflavin, ovoflavin, hepatoflavin
vitamin_B6, pyridoxine, pyridoxal, pyridoxamine, adermin
vitamin_Bc, vitamin_M, folate, folic_acid, folacin, pteroylglutamic_acid, pteroylmonoglutamic_acid
niacin, nicotinic_acid
vitamin_D, calciferol, viosterol, ergocalciferol, cholecalciferol, D
vitamin_E, tocopherol, E
biotin, vitamin_H
vitamin_K, naphthoquinone, antihemorrhagic_factor
vitamin_K1, phylloquinone, phytonadione
vitamin_K3, menadione
vitamin_P, bioflavinoid, citrin
vitamin_C, C, ascorbic_acid
planking
chipboard, hardboard
knothole


================================================
FILE: pytorch_classification/grad_cam/main_cnn.py
================================================
import os
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import models
from torchvision import transforms
from utils import GradCAM, show_cam_on_image, center_crop_img


def main():
    model = models.mobilenet_v3_large(pretrained=True)
    target_layers = [model.features[-1]]

    # model = models.vgg16(pretrained=True)
    # target_layers = [model.features]

    # model = models.resnet34(pretrained=True)
    # target_layers = [model.layer4]

    # model = models.regnet_y_800mf(pretrained=True)
    # target_layers = [model.trunk_output]

    # model = models.efficientnet_b0(pretrained=True)
    # target_layers = [model.features]

    data_transform = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    # load image
    img_path = "both.png"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path).convert('RGB')
    img = np.array(img, dtype=np.uint8)
    # img = center_crop_img(img, 224)

    # [C, H, W]
    img_tensor = data_transform(img)
    # expand batch dimension
    # [C, H, W] -> [N, C, H, W]
    input_tensor = torch.unsqueeze(img_tensor, dim=0)

    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)
    target_category = 281  # tabby, tabby cat
    # target_category = 254  # pug, pug-dog

    grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)

    grayscale_cam = grayscale_cam[0, :]
    visualization = show_cam_on_image(img.astype(dtype=np.float32) / 255.,
                                      grayscale_cam,
                                      use_rgb=True)
    plt.imshow(visualization)
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/grad_cam/main_swin.py
================================================
import os
import math
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
from utils import GradCAM, show_cam_on_image, center_crop_img
from swin_model import swin_base_patch4_window7_224


class ResizeTransform:
    def __init__(self, im_h: int, im_w: int):
        self.height = self.feature_size(im_h)
        self.width = self.feature_size(im_w)

    @staticmethod
    def feature_size(s):
        s = math.ceil(s / 4)  # PatchEmbed
        s = math.ceil(s / 2)  # PatchMerging1
        s = math.ceil(s / 2)  # PatchMerging2
        s = math.ceil(s / 2)  # PatchMerging3
        return s

    def __call__(self, x):
        result = x.reshape(x.size(0),
                           self.height,
                           self.width,
                           x.size(2))

        # Bring the channels to the first dimension,
        # like in CNNs.
        # [batch_size, H, W, C] -> [batch, C, H, W]
        result = result.permute(0, 3, 1, 2)

        return result


def main():
    # 注意输入的图片必须是32的整数倍
    # 否则由于padding的原因会出现注意力飘逸的问题
    img_size = 224
    assert img_size % 32 == 0

    model = swin_base_patch4_window7_224()
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth
    weights_path = "./swin_base_patch4_window7_224.pth"
    model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"], strict=False)

    target_layers = [model.norm]

    data_transform = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    # load image
    img_path = "both.png"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path).convert('RGB')
    img = np.array(img, dtype=np.uint8)
    img = center_crop_img(img, img_size)

    # [C, H, W]
    img_tensor = data_transform(img)
    # expand batch dimension
    # [C, H, W] -> [N, C, H, W]
    input_tensor = torch.unsqueeze(img_tensor, dim=0)

    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False,
                  reshape_transform=ResizeTransform(im_h=img_size, im_w=img_size))
    target_category = 281  # tabby, tabby cat
    # target_category = 254  # pug, pug-dog

    grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)

    grayscale_cam = grayscale_cam[0, :]
    visualization = show_cam_on_image(img / 255., grayscale_cam, use_rgb=True)
    plt.imshow(visualization)
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/grad_cam/main_vit.py
================================================
import os
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
from utils import GradCAM, show_cam_on_image, center_crop_img
from vit_model import vit_base_patch16_224


class ReshapeTransform:
    def __init__(self, model):
        input_size = model.patch_embed.img_size
        patch_size = model.patch_embed.patch_size
        self.h = input_size[0] // patch_size[0]
        self.w = input_size[1] // patch_size[1]

    def __call__(self, x):
        # remove cls token and reshape
        # [batch_size, num_tokens, token_dim]
        result = x[:, 1:, :].reshape(x.size(0),
                                     self.h,
                                     self.w,
                                     x.size(2))

        # Bring the channels to the first dimension,
        # like in CNNs.
        # [batch_size, H, W, C] -> [batch, C, H, W]
        result = result.permute(0, 3, 1, 2)
        return result


def main():
    model = vit_base_patch16_224()
    # 链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA  密码: eu9f
    weights_path = "./vit_base_patch16_224.pth"
    model.load_state_dict(torch.load(weights_path, map_location="cpu"))
    # Since the final classification is done on the class token computed in the last attention block,
    # the output will not be affected by the 14x14 channels in the last layer.
    # The gradient of the output with respect to them, will be 0!
    # We should chose any layer before the final attention block.
    target_layers = [model.blocks[-1].norm1]

    data_transform = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
    # load image
    img_path = "both.png"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path).convert('RGB')
    img = np.array(img, dtype=np.uint8)
    img = center_crop_img(img, 224)
    # [C, H, W]
    img_tensor = data_transform(img)
    # expand batch dimension
    # [C, H, W] -> [N, C, H, W]
    input_tensor = torch.unsqueeze(img_tensor, dim=0)

    cam = GradCAM(model=model,
                  target_layers=target_layers,
                  use_cuda=False,
                  reshape_transform=ReshapeTransform(model))
    target_category = 281  # tabby, tabby cat
    # target_category = 254  # pug, pug-dog

    grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)

    grayscale_cam = grayscale_cam[0, :]
    visualization = show_cam_on_image(img / 255., grayscale_cam, use_rgb=True)
    plt.imshow(visualization)
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/grad_cam/swin_model.py
================================================
""" Swin Transformer
A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`
    - https://arxiv.org/pdf/2103.14030

Code/weights from https://github.com/microsoft/Swin-Transformer

"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
import numpy as np
from typing import Optional


def drop_path_f(x, drop_prob: float = 0., training: bool = False):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.

    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path_f(x, self.drop_prob, self.training)


def window_partition(x, window_size: int):
    """
    将feature map按照window_size划分成一个个没有重叠的window
    Args:
        x: (B, H, W, C)
        window_size (int): window size(M)

    Returns:
        windows: (num_windows*B, window_size, window_size, C)
    """
    B, H, W, C = x.shape
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    # permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]
    # view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]
    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    return windows


def window_reverse(windows, window_size: int, H: int, W: int):
    """
    将一个个window还原成一个feature map
    Args:
        windows: (num_windows*B, window_size, window_size, C)
        window_size (int): Window size(M)
        H (int): Height of image
        W (int): Width of image

    Returns:
        x: (B, H, W, C)
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    # view: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C]
    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
    # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C]
    # view: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C]
    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
    return x


class PatchEmbed(nn.Module):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, patch_size=4, in_c=3, embed_dim=96, norm_layer=None):
        super().__init__()
        patch_size = (patch_size, patch_size)
        self.patch_size = patch_size
        self.in_chans = in_c
        self.embed_dim = embed_dim
        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        _, _, H, W = x.shape

        # padding
        # 如果输入图片的H，W不是patch_size的整数倍，需要进行padding
        pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0)
        if pad_input:
            # to pad the last 3 dimensions,
            # (W_left, W_right, H_top,H_bottom, C_front, C_back)
            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1],
                          0, self.patch_size[0] - H % self.patch_size[0],
                          0, 0))

        # 下采样patch_size倍
        x = self.proj(x)
        _, _, H, W = x.shape
        # flatten: [B, C, H, W] -> [B, C, HW]
        # transpose: [B, C, HW] -> [B, HW, C]
        x = x.flatten(2).transpose(1, 2)
        x = self.norm(x)
        return x, H, W


class PatchMerging(nn.Module):
    r""" Patch Merging Layer.

    Args:
        dim (int): Number of input channels.
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, dim, norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
        self.norm = norm_layer(4 * dim)

    def forward(self, x, H, W):
        """
        x: B, H*W, C
        """
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"

        x = x.view(B, H, W, C)

        # padding
        # 如果输入feature map的H，W不是2的整数倍，需要进行padding
        pad_input = (H % 2 == 1) or (W % 2 == 1)
        if pad_input:
            # to pad the last 3 dimensions, starting from the last dimension and moving forward.
            # (C_front, C_back, W_left, W_right, H_top, H_bottom)
            # 注意这里的Tensor通道是[B, H, W, C]，所以会和官方文档有些不同
            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))

        x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]
        x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]
        x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]
        x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]
        x = torch.cat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]
        x = x.view(B, -1, 4 * C)  # [B, H/2*W/2, 4*C]

        x = self.norm(x)
        x = self.reduction(x)  # [B, H/2*W/2, 2*C]

        return x


class Mlp(nn.Module):
    """ MLP as used in Vision Transformer, MLP-Mixer and related networks
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features

        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.drop1 = nn.Dropout(drop)
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop2 = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        return x


class WindowAttention(nn.Module):
    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.

    Args:
        dim (int): Number of input channels.
        window_size (tuple[int]): The height and width of the window.
        num_heads (int): Number of attention heads.
        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
    """

    def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.):

        super().__init__()
        self.dim = dim
        self.window_size = window_size  # [Mh, Mw]
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5

        # define a parameter table of relative position bias
        self.relative_position_bias_table = nn.Parameter(
            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # [2*Mh-1 * 2*Mw-1, nH]

        # get pair-wise relative position index for each token inside the window
        coords_h = torch.arange(self.window_size[0])
        coords_w = torch.arange(self.window_size[1])
        coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing="ij"))  # [2, Mh, Mw]
        coords_flatten = torch.flatten(coords, 1)  # [2, Mh*Mw]
        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]
        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # [Mh*Mw, Mh*Mw, 2]
        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]
        self.register_buffer("relative_position_index", relative_position_index)

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        nn.init.trunc_normal_(self.relative_position_bias_table, std=.02)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x, mask: Optional[torch.Tensor] = None):
        """
        Args:
            x: input features with shape of (num_windows*B, Mh*Mw, C)
            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
        """
        # [batch_size*num_windows, Mh*Mw, total_embed_dim]
        B_, N, C = x.shape
        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]
        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]
        # permute: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)

        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]
        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]
        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))

        # relative_position_bias_table.view: [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]
        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)
        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # [nH, Mh*Mw, Mh*Mw]
        attn = attn + relative_position_bias.unsqueeze(0)

        if mask is not None:
            # mask: [nW, Mh*Mw, Mh*Mw]
            nW = mask.shape[0]  # num_windows
            # attn.view: [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]
            # mask.unsqueeze: [1, nW, 1, Mh*Mw, Mh*Mw]
            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
            attn = attn.view(-1, self.num_heads, N, N)
            attn = self.softmax(attn)
        else:
            attn = self.softmax(attn)

        attn = self.attn_drop(attn)

        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]
        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]
        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x


class SwinTransformerBlock(nn.Module):
    r""" Swin Transformer Block.

    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (int): Window size.
        shift_size (int): Shift size for SW-MSA.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float, optional): Stochastic depth rate. Default: 0.0
        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,
                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

        self.norm1 = norm_layer(dim)
        self.attn = WindowAttention(
            dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, qkv_bias=qkv_bias,
            attn_drop=attn_drop, proj_drop=drop)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

    def forward(self, x, attn_mask):
        H, W = self.H, self.W
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"

        shortcut = x
        x = self.norm1(x)
        x = x.view(B, H, W, C)

        # pad feature maps to multiples of window size
        # 把feature map给pad到window size的整数倍
        pad_l = pad_t = 0
        pad_r = (self.window_size - W % self.window_size) % self.window_size
        pad_b = (self.window_size - H % self.window_size) % self.window_size
        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
        _, Hp, Wp, _ = x.shape

        # cyclic shift
        if self.shift_size > 0:
            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
        else:
            shifted_x = x
            attn_mask = None

        # partition windows
        x_windows = window_partition(shifted_x, self.window_size)  # [nW*B, Mh, Mw, C]
        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # [nW*B, Mh*Mw, C]

        # W-MSA/SW-MSA
        attn_windows = self.attn(x_windows, mask=attn_mask)  # [nW*B, Mh*Mw, C]

        # merge windows
        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)  # [nW*B, Mh, Mw, C]
        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # [B, H', W', C]

        # reverse cyclic shift
        if self.shift_size > 0:
            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
        else:
            x = shifted_x

        if pad_r > 0 or pad_b > 0:
            # 把前面pad的数据移除掉
            x = x[:, :H, :W, :].contiguous()

        x = x.view(B, H * W, C)

        # FFN
        x = shortcut + self.drop_path(x)
        x = x + self.drop_path(self.mlp(self.norm2(x)))

        return x


class BasicLayer(nn.Module):
    """
    A basic Swin Transformer layer for one stage.

    Args:
        dim (int): Number of input channels.
        depth (int): Number of blocks.
        num_heads (int): Number of attention heads.
        window_size (int): Local window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
    """

    def __init__(self, dim, depth, num_heads, window_size,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):
        super().__init__()
        self.dim = dim
        self.depth = depth
        self.window_size = window_size
        self.use_checkpoint = use_checkpoint
        self.shift_size = window_size // 2

        # build blocks
        self.blocks = nn.ModuleList([
            SwinTransformerBlock(
                dim=dim,
                num_heads=num_heads,
                window_size=window_size,
                shift_size=0 if (i % 2 == 0) else self.shift_size,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=drop,
                attn_drop=attn_drop,
                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
                norm_layer=norm_layer)
            for i in range(depth)])

        # patch merging layer
        if downsample is not None:
            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
        else:
            self.downsample = None

    def create_mask(self, x, H, W):
        # calculate attention mask for SW-MSA
        # 保证Hp和Wp是window_size的整数倍
        Hp = int(np.ceil(H / self.window_size)) * self.window_size
        Wp = int(np.ceil(W / self.window_size)) * self.window_size
        # 拥有和feature map一样的通道排列顺序，方便后续window_partition
        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # [1, Hp, Wp, 1]
        h_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        w_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        cnt = 0
        for h in h_slices:
            for w in w_slices:
                img_mask[:, h, w, :] = cnt
                cnt += 1

        mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]
        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)  # [nW, Mh*Mw]
        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)  # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]
        # [nW, Mh*Mw, Mh*Mw]
        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
        return attn_mask

    def forward(self, x, H, W):
        attn_mask = self.create_mask(x, H, W)  # [nW, Mh*Mw, Mh*Mw]
        for blk in self.blocks:
            blk.H, blk.W = H, W
            if not torch.jit.is_scripting() and self.use_checkpoint:
                x = checkpoint.checkpoint(blk, x, attn_mask)
            else:
                x = blk(x, attn_mask)
        if self.downsample is not None:
            x = self.downsample(x, H, W)
            H, W = (H + 1) // 2, (W + 1) // 2

        return x, H, W


class SwinTransformer(nn.Module):
    r""" Swin Transformer
        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
          https://arxiv.org/pdf/2103.14030

    Args:
        patch_size (int | tuple(int)): Patch size. Default: 4
        in_chans (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 1000
        embed_dim (int): Patch embedding dimension. Default: 96
        depths (tuple(int)): Depth of each Swin Transformer layer.
        num_heads (tuple(int)): Number of attention heads in different layers.
        window_size (int): Window size. Default: 7
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
        drop_rate (float): Dropout rate. Default: 0
        attn_drop_rate (float): Attention dropout rate. Default: 0
        drop_path_rate (float): Stochastic depth rate. Default: 0.1
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
        patch_norm (bool): If True, add normalization after patch embedding. Default: True
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
    """

    def __init__(self, patch_size=4, in_chans=3, num_classes=1000,
                 embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24),
                 window_size=7, mlp_ratio=4., qkv_bias=True,
                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
                 norm_layer=nn.LayerNorm, patch_norm=True,
                 use_checkpoint=False, **kwargs):
        super().__init__()

        self.num_classes = num_classes
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.patch_norm = patch_norm
        # stage4输出特征矩阵的channels
        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
        self.mlp_ratio = mlp_ratio

        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            patch_size=patch_size, in_c=in_chans, embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)
        self.pos_drop = nn.Dropout(p=drop_rate)

        # stochastic depth
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule

        # build layers
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            # 注意这里构建的stage和论文图中有些差异
            # 这里的stage不包含该stage的patch_merging层，包含的是下个stage的
            layers = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
                                depth=depths[i_layer],
                                num_heads=num_heads[i_layer],
                                window_size=window_size,
                                mlp_ratio=self.mlp_ratio,
                                qkv_bias=qkv_bias,
                                drop=drop_rate,
                                attn_drop=attn_drop_rate,
                                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                                norm_layer=norm_layer,
                                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
                                use_checkpoint=use_checkpoint)
            self.layers.append(layers)

        self.norm = norm_layer(self.num_features)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def forward(self, x):
        # x: [B, L, C]
        x, H, W = self.patch_embed(x)
        x = self.pos_drop(x)

        for layer in self.layers:
            x, H, W = layer(x, H, W)

        x = self.norm(x)  # [B, L, C]
        x = self.avgpool(x.transpose(1, 2))  # [B, C, 1]
        x = torch.flatten(x, 1)
        x = self.head(x)
        return x


def swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=96,
                            depths=(2, 2, 6, 2),
                            num_heads=(3, 6, 12, 24),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=96,
                            depths=(2, 2, 18, 2),
                            num_heads=(3, 6, 12, 24),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=192,
                            depths=(2, 2, 18, 2),
                            num_heads=(6, 12, 24, 48),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=192,
                            depths=(2, 2, 18, 2),
                            num_heads=(6, 12, 24, 48),
                            num_classes=num_classes,
                            **kwargs)
    return model


================================================
FILE: pytorch_classification/grad_cam/utils.py
================================================
import cv2
import numpy as np


class ActivationsAndGradients:
    """ Class for extracting activations and
    registering gradients from targeted intermediate layers """

    def __init__(self, model, target_layers, reshape_transform):
        self.model = model
        self.gradients = []
        self.activations = []
        self.reshape_transform = reshape_transform
        self.handles = []
        for target_layer in target_layers:
            self.handles.append(
                target_layer.register_forward_hook(
                    self.save_activation))
            # Backward compatibility with older pytorch versions:
            if hasattr(target_layer, 'register_full_backward_hook'):
                self.handles.append(
                    target_layer.register_full_backward_hook(
                        self.save_gradient))
            else:
                self.handles.append(
                    target_layer.register_backward_hook(
                        self.save_gradient))

    def save_activation(self, module, input, output):
        activation = output
        if self.reshape_transform is not None:
            activation = self.reshape_transform(activation)
        self.activations.append(activation.cpu().detach())

    def save_gradient(self, module, grad_input, grad_output):
        # Gradients are computed in reverse order
        grad = grad_output[0]
        if self.reshape_transform is not None:
            grad = self.reshape_transform(grad)
        self.gradients = [grad.cpu().detach()] + self.gradients

    def __call__(self, x):
        self.gradients = []
        self.activations = []
        return self.model(x)

    def release(self):
        for handle in self.handles:
            handle.remove()


class GradCAM:
    def __init__(self,
                 model,
                 target_layers,
                 reshape_transform=None,
                 use_cuda=False):
        self.model = model.eval()
        self.target_layers = target_layers
        self.reshape_transform = reshape_transform
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()
        self.activations_and_grads = ActivationsAndGradients(
            self.model, target_layers, reshape_transform)

    """ Get a vector of weights for every channel in the target layer.
        Methods that return weights channels,
        will typically need to only implement this function. """

    @staticmethod
    def get_cam_weights(grads):
        return np.mean(grads, axis=(2, 3), keepdims=True)

    @staticmethod
    def get_loss(output, target_category):
        loss = 0
        for i in range(len(target_category)):
            loss = loss + output[i, target_category[i]]
        return loss

    def get_cam_image(self, activations, grads):
        weights = self.get_cam_weights(grads)
        weighted_activations = weights * activations
        cam = weighted_activations.sum(axis=1)

        return cam

    @staticmethod
    def get_target_width_height(input_tensor):
        width, height = input_tensor.size(-1), input_tensor.size(-2)
        return width, height

    def compute_cam_per_layer(self, input_tensor):
        activations_list = [a.cpu().data.numpy()
                            for a in self.activations_and_grads.activations]
        grads_list = [g.cpu().data.numpy()
                      for g in self.activations_and_grads.gradients]
        target_size = self.get_target_width_height(input_tensor)

        cam_per_target_layer = []
        # Loop over the saliency image from every layer

        for layer_activations, layer_grads in zip(activations_list, grads_list):
            cam = self.get_cam_image(layer_activations, layer_grads)
            cam[cam < 0] = 0  # works like mute the min-max scale in the function of scale_cam_image
            scaled = self.scale_cam_image(cam, target_size)
            cam_per_target_layer.append(scaled[:, None, :])

        return cam_per_target_layer

    def aggregate_multi_layers(self, cam_per_target_layer):
        cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
        cam_per_target_layer = np.maximum(cam_per_target_layer, 0)
        result = np.mean(cam_per_target_layer, axis=1)
        return self.scale_cam_image(result)

    @staticmethod
    def scale_cam_image(cam, target_size=None):
        result = []
        for img in cam:
            img = img - np.min(img)
            img = img / (1e-7 + np.max(img))
            if target_size is not None:
                img = cv2.resize(img, target_size)
            result.append(img)
        result = np.float32(result)

        return result

    def __call__(self, input_tensor, target_category=None):

        if self.cuda:
            input_tensor = input_tensor.cuda()

        # 正向传播得到网络输出logits(未经过softmax)
        output = self.activations_and_grads(input_tensor)
        if isinstance(target_category, int):
            target_category = [target_category] * input_tensor.size(0)

        if target_category is None:
            target_category = np.argmax(output.cpu().data.numpy(), axis=-1)
            print(f"category id: {target_category}")
        else:
            assert (len(target_category) == input_tensor.size(0))

        self.model.zero_grad()
        loss = self.get_loss(output, target_category)
        loss.backward(retain_graph=True)

        # In most of the saliency attribution papers, the saliency is
        # computed with a single target layer.
        # Commonly it is the last convolutional layer.
        # Here we support passing a list with multiple target layers.
        # It will compute the saliency image for every image,
        # and then aggregate them (with a default mean aggregation).
        # This gives you more flexibility in case you just want to
        # use all conv layers for example, all Batchnorm layers,
        # or something else.
        cam_per_layer = self.compute_cam_per_layer(input_tensor)
        return self.aggregate_multi_layers(cam_per_layer)

    def __del__(self):
        self.activations_and_grads.release()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_tb):
        self.activations_and_grads.release()
        if isinstance(exc_value, IndexError):
            # Handle IndexError here...
            print(
                f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")
            return True


def show_cam_on_image(img: np.ndarray,
                      mask: np.ndarray,
                      use_rgb: bool = False,
                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
    """ This function overlays the cam mask on the image as an heatmap.
    By default the heatmap is in BGR format.

    :param img: The base image in RGB or BGR format.
    :param mask: The cam mask.
    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
    :param colormap: The OpenCV colormap to be used.
    :returns: The default image with the cam overlay.
    """

    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
    if use_rgb:
        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    heatmap = np.float32(heatmap) / 255

    if np.max(img) > 1:
        raise Exception(
            "The input image should np.float32 in the range [0, 1]")

    cam = heatmap + img
    cam = cam / np.max(cam)
    return np.uint8(255 * cam)


def center_crop_img(img: np.ndarray, size: int):
    h, w, c = img.shape

    if w == h == size:
        return img

    if w < h:
        ratio = size / w
        new_w = size
        new_h = int(h * ratio)
    else:
        ratio = size / h
        new_h = size
        new_w = int(w * ratio)

    img = cv2.resize(img, dsize=(new_w, new_h))

    if new_w == size:
        h = (new_h - size) // 2
        img = img[h: h+size]
    else:
        w = (new_w - size) // 2
        img = img[:, w: w+size]

    return img


================================================
FILE: pytorch_classification/grad_cam/vit_model.py
================================================
"""
original code from rwightman:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
"""
from functools import partial
from collections import OrderedDict

import torch
import torch.nn as nn


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class PatchEmbed(nn.Module):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):
        super().__init__()
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)
        self.img_size = img_size
        self.patch_size = patch_size
        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
        self.num_patches = self.grid_size[0] * self.grid_size[1]

        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, C, H, W = x.shape
        assert H == self.img_size[0] and W == self.img_size[1], \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."

        # flatten: [B, C, H, W] -> [B, C, HW]
        # transpose: [B, C, HW] -> [B, HW, C]
        x = self.proj(x).flatten(2).transpose(1, 2)
        x = self.norm(x)
        return x


class Attention(nn.Module):
    def __init__(self,
                 dim,   # 输入token的dim
                 num_heads=8,
                 qkv_bias=False,
                 qk_scale=None,
                 attn_drop_ratio=0.,
                 proj_drop_ratio=0.):
        super(Attention, self).__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5
        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop_ratio)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop_ratio)

    def forward(self, x):
        # [batch_size, num_patches + 1, total_embed_dim]
        B, N, C = x.shape

        # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]
        # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]
        # permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        # [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)

        # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]
        # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)

        # @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]
        # reshape: -> [batch_size, num_patches + 1, total_embed_dim]
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x


class Mlp(nn.Module):
    """
    MLP as used in Vision Transformer, MLP-Mixer and related networks
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


class Block(nn.Module):
    def __init__(self,
                 dim,
                 num_heads,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_ratio=0.,
                 attn_drop_ratio=0.,
                 drop_path_ratio=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm):
        super(Block, self).__init__()
        self.norm1 = norm_layer(dim)
        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
                              attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio)
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio)

    def forward(self, x):
        x = x + self.drop_path(self.attn(self.norm1(x)))
        x = x + self.drop_path(self.mlp(self.norm2(x)))
        return x


class VisionTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000,
                 embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True,
                 qk_scale=None, representation_size=None, distilled=False, drop_ratio=0.,
                 attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None,
                 act_layer=None):
        """
        Args:
            img_size (int, tuple): input image size
            patch_size (int, tuple): patch size
            in_c (int): number of input channels
            num_classes (int): number of classes for classification head
            embed_dim (int): embedding dimension
            depth (int): depth of transformer
            num_heads (int): number of attention heads
            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
            qkv_bias (bool): enable bias for qkv if True
            qk_scale (float): override default qk scale of head_dim ** -0.5 if set
            representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
            distilled (bool): model includes a distillation token and head as in DeiT models
            drop_ratio (float): dropout rate
            attn_drop_ratio (float): attention dropout rate
            drop_path_ratio (float): stochastic depth rate
            embed_layer (nn.Module): patch embedding layer
            norm_layer: (nn.Module): normalization layer
        """
        super(VisionTransformer, self).__init__()
        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
        self.num_tokens = 2 if distilled else 1
        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
        act_layer = act_layer or nn.GELU

        self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_ratio)

        dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)]  # stochastic depth decay rule
        self.blocks = nn.Sequential(*[
            Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
                  drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i],
                  norm_layer=norm_layer, act_layer=act_layer)
            for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # Representation layer
        if representation_size and not distilled:
            self.has_logits = True
            self.num_features = representation_size
            self.pre_logits = nn.Sequential(OrderedDict([
                ("fc", nn.Linear(embed_dim, representation_size)),
                ("act", nn.Tanh())
            ]))
        else:
            self.has_logits = False
            self.pre_logits = nn.Identity()

        # Classifier head(s)
        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
        self.head_dist = None
        if distilled:
            self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()

        # Weight init
        nn.init.trunc_normal_(self.pos_embed, std=0.02)
        if self.dist_token is not None:
            nn.init.trunc_normal_(self.dist_token, std=0.02)

        nn.init.trunc_normal_(self.cls_token, std=0.02)
        self.apply(_init_vit_weights)

    def forward_features(self, x):
        # [B, C, H, W] -> [B, num_patches, embed_dim]
        x = self.patch_embed(x)  # [B, 196, 768]
        # [1, 1, 768] -> [B, 1, 768]
        cls_token = self.cls_token.expand(x.shape[0], -1, -1)
        if self.dist_token is None:
            x = torch.cat((cls_token, x), dim=1)  # [B, 197, 768]
        else:
            x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1)

        x = self.pos_drop(x + self.pos_embed)
        x = self.blocks(x)
        x = self.norm(x)
        if self.dist_token is None:
            return self.pre_logits(x[:, 0])
        else:
            return x[:, 0], x[:, 1]

    def forward(self, x):
        x = self.forward_features(x)
        if self.head_dist is not None:
            x, x_dist = self.head(x[0]), self.head_dist(x[1])
            if self.training and not torch.jit.is_scripting():
                # during inference, return the average of both classifier predictions
                return x, x_dist
            else:
                return (x + x_dist) / 2
        else:
            x = self.head(x)
        return x


def _init_vit_weights(m):
    """
    ViT weight initialization
    :param m: module
    """
    if isinstance(m, nn.Linear):
        nn.init.trunc_normal_(m.weight, std=.01)
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode="fan_out")
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.LayerNorm):
        nn.init.zeros_(m.bias)
        nn.init.ones_(m.weight)


def vit_base_patch16_224(num_classes: int = 1000):
    """
    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA  密码: eu9f
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=None,
                              num_classes=num_classes)
    return model


def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=768 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_base_patch32_224(num_classes: int = 1000):
    """
    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg  密码: s5hl
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=None,
                              num_classes=num_classes)
    return model


def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=768 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_large_patch16_224(num_classes: int = 1000):
    """
    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ  密码: qqt8
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=None,
                              num_classes=num_classes)
    return model


def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=1024 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=1024 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    NOTE: converted weights not currently available, too large for github release hosting.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=14,
                              embed_dim=1280,
                              depth=32,
                              num_heads=16,
                              representation_size=1280 if has_logits else None,
                              num_classes=num_classes)
    return model


================================================
FILE: pytorch_classification/mini_imagenet/README.md
================================================
## download mini-imagenet
link: [https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ](https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ)  password: hl31

## dataset path structure
```
├── mini-imagenet: total 100 classes, 60000 images
     ├── images: 60000 images
     ├── train.csv: 64 classes, 38400 images
     ├── val.csv: 16 classes, 9600 images
     └── test.csv: 20 classes, 12000 images
```

================================================
FILE: pytorch_classification/mini_imagenet/imagenet_class_index.json
================================================
{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}

================================================
FILE: pytorch_classification/mini_imagenet/model.py
================================================
from typing import List, Callable

import torch
from torch import Tensor
import torch.nn as nn


def channel_shuffle(x: Tensor, groups: int) -> Tensor:

    batch_size, num_channels, height, width = x.size()
    channels_per_group = num_channels // groups

    # reshape
    # [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width]
    x = x.view(batch_size, groups, channels_per_group, height, width)

    x = torch.transpose(x, 1, 2).contiguous()

    # flatten
    x = x.view(batch_size, -1, height, width)

    return x


class InvertedResidual(nn.Module):
    def __init__(self, input_c: int, output_c: int, stride: int):
        super(InvertedResidual, self).__init__()

        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")
        self.stride = stride

        assert output_c % 2 == 0
        branch_features = output_c // 2
        # 当stride为1时，input_channel应该是branch_features的两倍
        # python中 '<<' 是位运算，可理解为计算×2的快速方法
        assert (self.stride != 1) or (input_c == branch_features << 1)

        if self.stride == 2:
            self.branch1 = nn.Sequential(
                self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1),
                nn.BatchNorm2d(input_c),
                nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(branch_features),
                nn.ReLU(inplace=True)
            )
        else:
            self.branch1 = nn.Sequential()

        self.branch2 = nn.Sequential(
            nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1,
                      stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True),
            self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1),
            nn.BatchNorm2d(branch_features),
            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True)
        )

    @staticmethod
    def depthwise_conv(input_c: int,
                       output_c: int,
                       kernel_s: int,
                       stride: int = 1,
                       padding: int = 0,
                       bias: bool = False) -> nn.Conv2d:
        return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s,
                         stride=stride, padding=padding, bias=bias, groups=input_c)

    def forward(self, x: Tensor) -> Tensor:
        if self.stride == 1:
            x1, x2 = x.chunk(2, dim=1)
            out = torch.cat((x1, self.branch2(x2)), dim=1)
        else:
            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)

        out = channel_shuffle(out, 2)

        return out


class ShuffleNetV2(nn.Module):
    def __init__(self,
                 stages_repeats: List[int],
                 stages_out_channels: List[int],
                 num_classes: int = 1000,
                 inverted_residual: Callable[..., nn.Module] = InvertedResidual):
        super(ShuffleNetV2, self).__init__()

        if len(stages_repeats) != 3:
            raise ValueError("expected stages_repeats as list of 3 positive ints")
        if len(stages_out_channels) != 5:
            raise ValueError("expected stages_out_channels as list of 5 positive ints")
        self._stage_out_channels = stages_out_channels

        # input RGB image
        input_channels = 3
        output_channels = self._stage_out_channels[0]

        self.conv1 = nn.Sequential(
            nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(inplace=True)
        )
        input_channels = output_channels

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Static annotations for mypy
        self.stage2: nn.Sequential
        self.stage3: nn.Sequential
        self.stage4: nn.Sequential

        stage_names = ["stage{}".format(i) for i in [2, 3, 4]]
        for name, repeats, output_channels in zip(stage_names, stages_repeats,
                                                  self._stage_out_channels[1:]):
            seq = [inverted_residual(input_channels, output_channels, 2)]
            for i in range(repeats - 1):
                seq.append(inverted_residual(output_channels, output_channels, 1))
            setattr(self, name, nn.Sequential(*seq))
            input_channels = output_channels

        output_channels = self._stage_out_channels[-1]
        self.conv5 = nn.Sequential(
            nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(inplace=True)
        )

        self.fc = nn.Linear(output_channels, num_classes)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.conv5(x)
        x = x.mean([2, 3])  # global pool
        x = self.fc(x)
        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def shufflenet_v2_x1_0(num_classes=1000):
    """
    Constructs a ShuffleNetV2 with 1.0x output channels, as described in
    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
    <https://arxiv.org/abs/1807.11164>`.
    weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth

    :param num_classes:
    :return:
    """
    model = ShuffleNetV2(stages_repeats=[4, 8, 4],
                         stages_out_channels=[24, 116, 232, 464, 1024],
                         num_classes=num_classes)

    return model


def shufflenet_v2_x0_5(num_classes=1000):
    """
    Constructs a ShuffleNetV2 with 0.5x output channels, as described in
    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
    <https://arxiv.org/abs/1807.11164>`.
    weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth

    :param num_classes:
    :return:
    """
    model = ShuffleNetV2(stages_repeats=[4, 8, 4],
                         stages_out_channels=[24, 48, 96, 192, 1024],
                         num_classes=num_classes)

    return model


================================================
FILE: pytorch_classification/mini_imagenet/multi_train_utils/__init__.py
================================================
from .train_eval_utils import train_one_epoch, evaluate
from .distributed_utils import init_distributed_mode, dist, cleanup


================================================
FILE: pytorch_classification/mini_imagenet/multi_train_utils/distributed_utils.py
================================================
import os

import torch
import torch.distributed as dist


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'  # 通信后端，nvidia GPU推荐使用NCCL
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                            world_size=args.world_size, rank=args.rank)
    dist.barrier()


def cleanup():
    dist.destroy_process_group()


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def reduce_value(value, average=True):
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return value

    with torch.no_grad():
        dist.all_reduce(value)
        if average:
            value /= world_size

        return value


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


================================================
FILE: pytorch_classification/mini_imagenet/multi_train_utils/train_eval_utils.py
================================================
import sys

from tqdm import tqdm
import torch

from .distributed_utils import reduce_value, is_main_process, warmup_lr_scheduler


def train_one_epoch(model, optimizer, data_loader, device, epoch, use_amp=False, warmup=True):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    accu_loss = torch.zeros(1).to(device)  # 累计损失
    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    optimizer.zero_grad()

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    # 在进程0中打印训练进度
    if is_main_process():
        data_loader = tqdm(data_loader, file=sys.stdout)

    enable_amp = use_amp and "cuda" in device.type
    scaler = torch.cuda.amp.GradScaler(enabled=enable_amp)

    sample_num = 0
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        with torch.cuda.amp.autocast(enabled=enable_amp):
            pred = model(images.to(device))
            loss = loss_function(pred, labels.to(device))

            pred_classes = torch.max(pred, dim=1)[1]
            accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        loss = reduce_value(loss, average=True)
        accu_loss += loss.detach()

        # 在进程0中打印平均loss
        if is_main_process():
            info = "[epoch {}] loss: {:.3f}, train_acc: {:.3f}, lr: {:.5f}".format(
                epoch,
                accu_loss.item() / (step + 1),
                accu_num.item() / sample_num,
                optimizer.param_groups[0]["lr"])
            data_loader.desc = info

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        if lr_scheduler is not None:  # 如果使用warmup训练，逐渐调整学习率
            lr_scheduler.step()

    # 等待所有进程计算完毕
    if device != torch.device("cpu"):
        torch.cuda.synchronize(device)

    return accu_loss.item() / (step + 1)


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 验证集样本个数
    num_samples = len(data_loader.dataset)

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    # 在进程0中打印验证进度
    if is_main_process():
        data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    # 等待所有进程计算完毕
    if device != torch.device("cpu"):
        torch.cuda.synchronize(device)

    sum_num = reduce_value(sum_num, average=False)
    acc = sum_num.item() / num_samples

    return acc


================================================
FILE: pytorch_classification/mini_imagenet/my_dataset.py
================================================
import os
import json
from PIL import Image
import pandas as pd
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self,
                 root_dir: str,
                 csv_name: str,
                 json_path: str,
                 transform=None):
        images_dir = os.path.join(root_dir, "images")
        assert os.path.exists(images_dir), "dir:'{}' not found.".format(images_dir)

        assert os.path.exists(json_path), "file:'{}' not found.".format(json_path)
        self.label_dict = json.load(open(json_path, "r"))

        csv_path = os.path.join(root_dir, csv_name)
        assert os.path.exists(csv_path), "file:'{}' not found.".format(csv_path)
        csv_data = pd.read_csv(csv_path)
        self.total_num = csv_data.shape[0]
        self.img_paths = [os.path.join(images_dir, i)for i in csv_data["filename"].values]
        self.img_label = [self.label_dict[i][0] for i in csv_data["label"].values]
        self.labels = set(csv_data["label"].values)

        self.transform = transform

    def __len__(self):
        return self.total_num

    def __getitem__(self, item):
        img = Image.open(self.img_paths[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.img_paths[item]))
        label = self.img_label[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/mini_imagenet/restructure_csv.py
================================================
import os
import json

import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt


def read_csv_classes(csv_dir: str, csv_name: str):
    data = pd.read_csv(os.path.join(csv_dir, csv_name))
    # print(data.head(1))  # filename, label

    label_set = set(data["label"].drop_duplicates().values)

    print("{} have {} images and {} classes.".format(csv_name,
                                                     data.shape[0],
                                                     len(label_set)))
    return data, label_set


def calculate_split_info(path: str, label_dict: dict, rate: float = 0.2):
    # read all images
    image_dir = os.path.join(path, "images")
    images_list = [i for i in os.listdir(image_dir) if i.endswith(".jpg")]
    print("find {} images in dataset.".format(len(images_list)))

    train_data, train_label = read_csv_classes(path, "train.csv")
    val_data, val_label = read_csv_classes(path, "val.csv")
    test_data, test_label = read_csv_classes(path, "test.csv")

    # Union operation
    labels = (train_label | val_label | test_label)
    labels = list(labels)
    labels.sort()
    print("all classes: {}".format(len(labels)))

    # create classes_name.json
    classes_label = dict([(label, [index, label_dict[label]]) for index, label in enumerate(labels)])
    json_str = json.dumps(classes_label, indent=4)
    with open('classes_name.json', 'w') as json_file:
        json_file.write(json_str)

    # concat csv data
    data = pd.concat([train_data, val_data, test_data], axis=0)
    print("total data shape: {}".format(data.shape))

    # split data on every classes
    num_every_classes = []
    split_train_data = []
    split_val_data = []
    for label in labels:
        class_data = data[data["label"] == label]
        num_every_classes.append(class_data.shape[0])

        # shuffle
        shuffle_data = class_data.sample(frac=1, random_state=1)
        num_train_sample = int(class_data.shape[0] * (1 - rate))
        split_train_data.append(shuffle_data[:num_train_sample])
        split_val_data.append(shuffle_data[num_train_sample:])

        # imshow
        imshow_flag = False
        if imshow_flag:
            img_name, img_label = shuffle_data.iloc[0].values
            img = Image.open(os.path.join(image_dir, img_name))
            plt.imshow(img)
            plt.title("class: " + classes_label[img_label][1])
            plt.show()

    # plot classes distribution
    plot_flag = False
    if plot_flag:
        plt.bar(range(1, 101), num_every_classes, align='center')
        plt.show()

    # concatenate data
    new_train_data = pd.concat(split_train_data, axis=0)
    new_val_data = pd.concat(split_val_data, axis=0)

    # save new csv data
    new_train_data.to_csv(os.path.join(path, "new_train.csv"))
    new_val_data.to_csv(os.path.join(path, "new_val.csv"))


def main():
    data_dir = "/data/mini-imagenet/"
    json_path = "./imagenet_class_index.json"

    # load imagenet labels
    label_dict = json.load(open(json_path, "r"))
    label_dict = dict([(v[0], v[1]) for k, v in label_dict.items()])

    calculate_split_info(data_dir, label_dict)


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/mini_imagenet/train_multi_gpu_using_launch.py
================================================
import os
import math
import tempfile
import argparse

import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

from model import shufflenet_v2_x1_0
from my_dataset import MyDataSet
from multi_train_utils import train_one_epoch, evaluate, init_distributed_mode, dist, cleanup


def main(args):
    if torch.cuda.is_available() is False:
        raise EnvironmentError("not find GPU device for training.")

    # 初始化各进程环境
    init_distributed_mode(args=args)

    rank = args.rank
    device = torch.device(args.device)
    batch_size = args.batch_size
    num_classes = args.num_classes
    weights_path = args.weights
    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增

    if rank == 0:  # 在第一个进程中打印信息，并实例化tensorboard
        print(args)
        print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
        tb_writer = SummaryWriter()
        if os.path.exists("./weights") is False:
            os.makedirs("./weights")

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    data_root = args.data_path
    json_path = "./classes_name.json"
    # 实例化训练数据集
    train_dataset = MyDataSet(root_dir=data_root,
                              csv_name="new_train.csv",
                              json_path=json_path,
                              transform=data_transform["train"])

    # check num_classes
    if args.num_classes != len(train_dataset.labels):
        raise ValueError("dataset have {} classes, but input {}".format(len(train_dataset.labels),
                                                                        args.num_classes))

    # 实例化验证数据集
    val_dataset = MyDataSet(root_dir=data_root,
                            csv_name="new_val.csv",
                            json_path=json_path,
                            transform=data_transform["val"])

    # 给每个rank对应的进程分配训练的样本索引
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)

    # 将样本索引每batch_size个元素组成一个list
    train_batch_sampler = torch.utils.data.BatchSampler(
        train_sampler, batch_size, drop_last=True)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    if rank == 0:
        print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_sampler=train_batch_sampler,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             sampler=val_sampler,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)
    # 实例化模型
    model = shufflenet_v2_x1_0(num_classes=num_classes).to(device)

    # 如果存在预训练权重则载入
    if os.path.exists(weights_path):
        weights_dict = torch.load(weights_path, map_location=device)
        load_weights_dict = {k: v for k, v in weights_dict.items()
                             if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(load_weights_dict, strict=False)
    else:
        checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt")
        # 如果不存在预训练权重，需要将第一个进程中的权重保存，然后其他进程载入，保持初始化权重一致
        if rank == 0:
            torch.save(model.state_dict(), checkpoint_path)

        dist.barrier()
        # 这里注意，一定要指定map_location参数，否则会导致第一块GPU占用更多资源
        model.load_state_dict(torch.load(checkpoint_path, map_location=device))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)
    else:
        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义
        if args.syncBN:
            # 使用SyncBatchNorm后训练会更耗时
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)

    # 转为DDP模型
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    # optimizer
    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        train_sampler.set_epoch(epoch)

        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)

        if rank == 0:
            print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
            tags = ["loss", "accuracy", "learning_rate"]
            tb_writer.add_scalar(tags[0], mean_loss, epoch)
            tb_writer.add_scalar(tags[1], acc, epoch)
            tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

            torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch))

    # 删除临时缓存文件
    if rank == 0:
        if os.path.exists(checkpoint_path) is True:
            os.remove(checkpoint_path)

    cleanup()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=100)
    parser.add_argument('--epochs', type=int, default=100)
    parser.add_argument('--batch-size', type=int, default=32)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lrf', type=float, default=0.0001)
    # 是否启用SyncBatchNorm
    parser.add_argument('--syncBN', type=bool, default=True)

    # 数据集所在根目录
    parser.add_argument('--data-path', type=str,
                        default="/home/wz/mini-imagenet/")

    parser.add_argument('--weights', type=str, default='',
                        help='initial weights path')

    parser.add_argument('--freeze-layers', type=bool, default=False)

    # 不要改该参数，系统会自动分配
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')

    # 开启的进程数(注意不是线程),不用设置该参数，会根据nproc_per_node自动设置
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')

    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/mini_imagenet/train_single_gpu.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import shufflenet_v2_x1_0
from my_dataset import MyDataSet
from multi_train_utils import train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    data_root = args.data_path
    json_path = "./classes_name.json"
    # 实例化训练数据集
    train_dataset = MyDataSet(root_dir=data_root,
                              csv_name="new_train.csv",
                              json_path=json_path,
                              transform=data_transform["train"])

    # check num_classes
    if args.num_classes != len(train_dataset.labels):
        raise ValueError("dataset have {} classes, but input {}".format(len(train_dataset.labels),
                                                                        args.num_classes))

    # 实例化验证数据集
    val_dataset = MyDataSet(root_dir=data_root,
                            csv_name="new_val.csv",
                            json_path=json_path,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # create model
    model = shufflenet_v2_x1_0(num_classes=args.num_classes).to(device)

    # 如果存在预训练权重则载入
    # if args.weights != "":
    #     if os.path.exists(args.weights):
    #         weights_dict = torch.load(args.weights, map_location=device)
    #         load_weights_dict = {k: v for k, v in weights_dict.items()
    #                              if model.state_dict()[k].numel() == v.numel()}
    #         print(model.load_state_dict(load_weights_dict, strict=False))
    #     else:
    #         raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    # if args.freeze_layers:
    #     for name, para in model.named_parameters():
    #         # 除最后的全连接层外，其他权重全部冻结
    #         if "fc" not in name:
    #             para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch,
                                    warmup=True)

        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)

        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=100)
    parser.add_argument('--epochs', type=int, default=100)
    parser.add_argument('--batch-size', type=int, default=32)
    parser.add_argument('--lr', type=float, default=0.1)
    parser.add_argument('--lrf', type=float, default=0.0001)

    # 数据集所在根目录
    parser.add_argument('--data-path', type=str, default="/home/wz/mini-imagenet/")

    parser.add_argument('--weights', type=str, default='',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/model_complexity/main.py
================================================
import torch
from fvcore.nn import FlopCountAnalysis, parameter_count_table
from prettytable import PrettyTable
from model import efficientnetv2_s


def main():
    model = efficientnetv2_s()

    # option1
    for name, para in model.named_parameters():
        # 除head外，其他权重全部冻结
        if "head" not in name:
            para.requires_grad_(False)
        else:
            print("training {}".format(name))

    complexity = model.complexity(224, 224, 3)
    table = PrettyTable()
    table.field_names = ["params", "freeze-params", "train-params", "FLOPs", "acts"]
    table.add_row([complexity["params"],
                   complexity["freeze"],
                   complexity["params"] - complexity["freeze"],
                   complexity["flops"],
                   complexity["acts"]])
    print(table)

    # option2
    tensor = (torch.rand(1, 3, 224, 224),)
    flops = FlopCountAnalysis(model, tensor)
    print(flops.total())

    print(parameter_count_table(model))


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/model_complexity/model.py
================================================
from collections import OrderedDict
from functools import partial
from typing import Callable, Optional

import torch.nn as nn
import torch
from torch import Tensor

from utils import *


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf

    This function is taken from the rwightman.
    It can be seen here:
    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class ConvBNAct(nn.Module):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        super(ConvBNAct, self).__init__()

        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)

        self.conv = nn.Conv2d(in_channels=in_planes,
                              out_channels=out_planes,
                              kernel_size=kernel_size,
                              stride=stride,
                              padding=padding,
                              groups=groups,
                              bias=False)

        self.bn = norm_layer(out_planes)
        self.act = activation_layer()

    def forward(self, x):
        result = self.conv(x)
        result = self.bn(result)
        result = self.act(result)

        return result

    def complexity(self, cx):
        cx = conv2d_cx(cx,
                       in_c=self.conv.in_channels,
                       out_c=self.conv.out_channels,
                       k=self.conv.kernel_size[0],  # tuple type
                       stride=self.conv.stride[0],  # tuple type
                       groups=self.conv.groups,
                       bias=False,
                       trainable=self.conv.weight.requires_grad)
        cx = norm2d_cx(cx, self.conv.out_channels, trainable=self.bn.weight.requires_grad)

        return cx


class SqueezeExcite(nn.Module):
    def __init__(self,
                 input_c: int,   # block input channel
                 expand_c: int,  # block expand channel
                 se_ratio: float = 0.25):
        super(SqueezeExcite, self).__init__()
        squeeze_c = int(input_c * se_ratio)
        self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1)
        self.act1 = nn.SiLU()  # alias Swish
        self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1)
        self.act2 = nn.Sigmoid()

    def forward(self, x: Tensor) -> Tensor:
        scale = x.mean((2, 3), keepdim=True)
        scale = self.conv_reduce(scale)
        scale = self.act1(scale)
        scale = self.conv_expand(scale)
        scale = self.act2(scale)
        return scale * x

    def complexity(self, cx):
        h, w = cx["h"], cx["w"]
        cx = gap2d_cx(cx)
        cx = conv2d_cx(cx,
                       in_c=self.conv_reduce.in_channels,
                       out_c=self.conv_reduce.out_channels,
                       k=1,
                       bias=True,
                       trainable=self.conv_reduce.weight.requires_grad)
        cx = conv2d_cx(cx,
                       in_c=self.conv_expand.in_channels,
                       out_c=self.conv_expand.out_channels,
                       k=1,
                       bias=True,
                       trainable=self.conv_expand.weight.requires_grad)
        cx["h"], cx["w"] = h, w

        return cx


class MBConv(nn.Module):
    def __init__(self,
                 kernel_size: int,
                 input_c: int,
                 out_c: int,
                 expand_ratio: int,
                 stride: int,
                 se_ratio: float,
                 drop_rate: float,
                 norm_layer: Callable[..., nn.Module]):
        super(MBConv, self).__init__()

        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.has_shortcut = (stride == 1 and input_c == out_c)

        activation_layer = nn.SiLU  # alias Swish
        expanded_c = input_c * expand_ratio

        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在
        assert expand_ratio != 1
        # Point-wise expansion
        self.expand_conv = ConvBNAct(input_c,
                                     expanded_c,
                                     kernel_size=1,
                                     norm_layer=norm_layer,
                                     activation_layer=activation_layer)

        # Depth-wise convolution
        self.dwconv = ConvBNAct(expanded_c,
                                expanded_c,
                                kernel_size=kernel_size,
                                stride=stride,
                                groups=expanded_c,
                                norm_layer=norm_layer,
                                activation_layer=activation_layer)

        self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity()

        # Point-wise linear projection
        self.project_conv = ConvBNAct(expanded_c,
                                      out_planes=out_c,
                                      kernel_size=1,
                                      norm_layer=norm_layer,
                                      activation_layer=nn.Identity)  # 注意这里没有激活函数，所有传入Identity

        self.out_channels = out_c

        # 只有在使用shortcut连接时才使用dropout层
        self.drop_rate = drop_rate
        if self.has_shortcut and drop_rate > 0:
            self.dropout = DropPath(drop_rate)

    def forward(self, x: Tensor) -> Tensor:
        result = self.expand_conv(x)
        result = self.dwconv(result)
        result = self.se(result)
        result = self.project_conv(result)

        if self.has_shortcut:
            if self.drop_rate > 0:
                result = self.dropout(result)
            result += x

        return result

    def complexity(self, cx):
        cx = self.expand_conv.complexity(cx)
        cx = self.dwconv.complexity(cx)
        cx = self.se.complexity(cx)
        cx = self.project_conv.complexity(cx)

        return cx


class FusedMBConv(nn.Module):
    def __init__(self,
                 kernel_size: int,
                 input_c: int,
                 out_c: int,
                 expand_ratio: int,
                 stride: int,
                 se_ratio: float,
                 drop_rate: float,
                 norm_layer: Callable[..., nn.Module]):
        super(FusedMBConv, self).__init__()

        assert stride in [1, 2]
        assert se_ratio == 0

        self.has_shortcut = stride == 1 and input_c == out_c
        self.drop_rate = drop_rate

        self.has_expansion = expand_ratio != 1

        activation_layer = nn.SiLU  # alias Swish
        expanded_c = input_c * expand_ratio

        # 只有当expand ratio不等于1时才有expand conv
        if self.has_expansion:
            # Expansion convolution
            self.expand_conv = ConvBNAct(input_c,
                                         expanded_c,
                                         kernel_size=kernel_size,
                                         stride=stride,
                                         norm_layer=norm_layer,
                                         activation_layer=activation_layer)

            self.project_conv = ConvBNAct(expanded_c,
                                          out_c,
                                          kernel_size=1,
                                          norm_layer=norm_layer,
                                          activation_layer=nn.Identity)  # 注意没有激活函数
        else:
            # 当只有project_conv时的情况
            self.project_conv = ConvBNAct(input_c,
                                          out_c,
                                          kernel_size=kernel_size,
                                          stride=stride,
                                          norm_layer=norm_layer,
                                          activation_layer=activation_layer)  # 注意有激活函数

        self.out_channels = out_c

        # 只有在使用shortcut连接时才使用dropout层
        self.drop_rate = drop_rate
        if self.has_shortcut and drop_rate > 0:
            self.dropout = DropPath(drop_rate)

    def forward(self, x: Tensor) -> Tensor:
        if self.has_expansion:
            result = self.expand_conv(x)
            result = self.project_conv(result)
        else:
            result = self.project_conv(x)

        if self.has_shortcut:
            if self.drop_rate > 0:
                result = self.dropout(result)

            result += x

        return result

    def complexity(self, cx):
        if self.has_expansion:
            cx = self.expand_conv.complexity(cx)
            cx = self.project_conv.complexity(cx)
        else:
            cx = self.project_conv.complexity(cx)

        return cx


class EfficientNetV2(nn.Module):
    def __init__(self,
                 model_cnf: list,
                 num_classes: int = 1000,
                 num_features: int = 1280,
                 dropout_rate: float = 0.2,
                 drop_connect_rate: float = 0.2):
        super(EfficientNetV2, self).__init__()

        for cnf in model_cnf:
            assert len(cnf) == 8
        self.model_cnf = model_cnf
        self.num_classes = num_classes
        self.num_features = num_features

        norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)

        stem_filter_num = model_cnf[0][4]

        self.stem = ConvBNAct(3,
                              stem_filter_num,
                              kernel_size=3,
                              stride=2,
                              norm_layer=norm_layer)  # 激活函数默认是SiLU

        total_blocks = sum([i[0] for i in model_cnf])
        block_id = 0
        blocks = []
        for cnf in model_cnf:
            repeats = cnf[0]
            op = FusedMBConv if cnf[-2] == 0 else MBConv
            for i in range(repeats):
                blocks.append(op(kernel_size=cnf[1],
                                 input_c=cnf[4] if i == 0 else cnf[5],
                                 out_c=cnf[5],
                                 expand_ratio=cnf[3],
                                 stride=cnf[2] if i == 0 else 1,
                                 se_ratio=cnf[-1],
                                 drop_rate=drop_connect_rate * block_id / total_blocks,
                                 norm_layer=norm_layer))
                block_id += 1
        self.blocks = nn.Sequential(*blocks)

        head_input_c = model_cnf[-1][-3]
        head = OrderedDict()

        head.update({"project_conv": ConvBNAct(head_input_c,
                                               num_features,
                                               kernel_size=1,
                                               norm_layer=norm_layer)})  # 激活函数默认是SiLU

        head.update({"avgpool": nn.AdaptiveAvgPool2d(1)})
        head.update({"flatten": nn.Flatten()})

        if dropout_rate > 0:
            head.update({"dropout": nn.Dropout(p=dropout_rate, inplace=True)})
        head.update({"classifier": nn.Linear(num_features, num_classes)})

        self.head = nn.Sequential(head)

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x: Tensor) -> Tensor:
        x = self.stem(x)
        x = self.blocks(x)
        x = self.head(x)

        return x

    def complexity(self, h, w, c):
        cx = {"h": h, "w": w, "c": c, "flops": 0, "params": 0, "acts": 0, "freeze": 0}
        cx = self.stem.complexity(cx)

        for module in self.blocks.children():
            if hasattr(module, "complexity"):
                cx = module.complexity(cx)
            else:
                print(module)

        for module in self.head.children():
            if hasattr(module, "complexity"):
                cx = module.complexity(cx)
            elif isinstance(module, nn.Linear):
                in_units = module.in_features
                out_units = module.out_features
                cx = gap2d_cx(cx)
                cx = linear_cx(cx, in_units, out_units, bias=True, trainable=module.weight.requires_grad)
        # print(cx)
        return cx


def efficientnetv2_s(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 300, eval_size: 384

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],
                    [4, 3, 2, 4, 24, 48, 0, 0],
                    [4, 3, 2, 4, 48, 64, 0, 0],
                    [6, 3, 2, 4, 64, 128, 1, 0.25],
                    [9, 3, 1, 6, 128, 160, 1, 0.25],
                    [15, 3, 2, 6, 160, 256, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.2)
    return model


def efficientnetv2_m(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 384, eval_size: 480

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],
                    [5, 3, 2, 4, 24, 48, 0, 0],
                    [5, 3, 2, 4, 48, 80, 0, 0],
                    [7, 3, 2, 4, 80, 160, 1, 0.25],
                    [14, 3, 1, 6, 160, 176, 1, 0.25],
                    [18, 3, 2, 6, 176, 304, 1, 0.25],
                    [5, 3, 1, 6, 304, 512, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.3)
    return model


def efficientnetv2_l(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 384, eval_size: 480

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],
                    [7, 3, 2, 4, 32, 64, 0, 0],
                    [7, 3, 2, 4, 64, 96, 0, 0],
                    [10, 3, 2, 4, 96, 192, 1, 0.25],
                    [19, 3, 1, 6, 192, 224, 1, 0.25],
                    [25, 3, 2, 6, 224, 384, 1, 0.25],
                    [7, 3, 1, 6, 384, 640, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.4)
    return model


================================================
FILE: pytorch_classification/model_complexity/utils.py
================================================
"""
these code refers to:
https://github.com/facebookresearch/pycls/blob/master/pycls/models/blocks.py
"""


def conv2d_cx(cx, in_c, out_c, k, *, stride=1, groups=1, bias=False, trainable=True):
    """Accumulates complexity of conv2d into cx = (h, w, flops, params, acts)."""
    assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
    h, w, c = cx["h"], cx["w"], cx["c"]
    assert c == in_c
    h, w = (h - 1) // stride + 1, (w - 1) // stride + 1
    cx["h"] = h
    cx["w"] = w
    cx["c"] = out_c
    cx["flops"] += k * k * in_c * out_c * h * w // groups + (out_c if bias else 0)
    cx["params"] += k * k * in_c * out_c // groups + (out_c if bias else 0)
    cx["acts"] += out_c * h * w
    if trainable is False:
        cx["freeze"] += k * k * in_c * out_c // groups + (out_c if bias else 0)
    return cx


def pool2d_cx(cx, in_c, k, *, stride=1):
    """Accumulates complexity of pool2d into cx = (h, w, flops, params, acts)."""
    assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
    h, w, c = cx["h"], cx["w"], cx["c"]
    assert c == in_c
    h, w = (h - 1) // stride + 1, (w - 1) // stride + 1
    cx["h"] = h
    cx["w"] = w
    cx["acts"] += in_c * h * w
    return cx


def norm2d_cx(cx, in_c, trainable=True):
    """Accumulates complexity of norm2d into cx = (h, w, flops, params, acts)."""
    c, params = cx["c"], cx["params"]
    assert c == in_c
    cx["params"] += 4 * c
    cx["freeze"] += 2 * c  # moving_mean, variance
    if trainable is False:
        cx["freeze"] += 2 * c  # beta, gamma
    return cx


def gap2d_cx(cx):
    """Accumulates complexity of gap2d into cx = (h, w, flops, params, acts)."""
    cx["h"] = 1
    cx["w"] = 1
    return cx


def linear_cx(cx, in_units, out_units, *, bias=False, trainable=True):
    """Accumulates complexity of linear into cx = (h, w, flops, params, acts)."""
    c = cx["c"]
    assert c == in_units
    cx["c"] = out_units
    cx["flops"] += in_units * out_units + (out_units if bias else 0)
    cx["params"] += in_units * out_units + (out_units if bias else 0)
    cx["acts"] += out_units
    if trainable is False:
        cx["freeze"] += in_units * out_units + (out_units if bias else 0)
    return cx


================================================
FILE: pytorch_classification/swin_transformer/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/swin_transformer/create_confusion_matrix.py
================================================
import os
import json
import argparse
import sys

import torch
from torchvision import transforms
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from prettytable import PrettyTable

from utils import read_split_data
from my_dataset import MyDataSet
from model import swin_base_patch4_window12_384_in22k as create_model


class ConfusionMatrix(object):
    """
    注意，如果显示的图像不全，是matplotlib版本问题
    本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常
    需要额外安装prettytable库
    """
    def __init__(self, num_classes: int, labels: list):
        self.matrix = np.zeros((num_classes, num_classes))
        self.num_classes = num_classes
        self.labels = labels

    def update(self, preds, labels):
        for p, t in zip(preds, labels):
            self.matrix[p, t] += 1

    def summary(self):
        # calculate accuracy
        sum_TP = 0
        for i in range(self.num_classes):
            sum_TP += self.matrix[i, i]
        acc = sum_TP / np.sum(self.matrix)
        print("the model accuracy is ", acc)

        # precision, recall, specificity
        table = PrettyTable()
        table.field_names = ["", "Precision", "Recall", "Specificity"]
        for i in range(self.num_classes):
            TP = self.matrix[i, i]
            FP = np.sum(self.matrix[i, :]) - TP
            FN = np.sum(self.matrix[:, i]) - TP
            TN = np.sum(self.matrix) - TP - FP - FN
            Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0.
            Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0.
            Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0.
            table.add_row([self.labels[i], Precision, Recall, Specificity])
        print(table)

    def plot(self):
        matrix = self.matrix
        print(matrix)
        plt.imshow(matrix, cmap=plt.cm.Blues)

        # 设置x轴坐标label
        plt.xticks(range(self.num_classes), self.labels, rotation=45)
        # 设置y轴坐标label
        plt.yticks(range(self.num_classes), self.labels)
        # 显示colorbar
        plt.colorbar()
        plt.xlabel('True Labels')
        plt.ylabel('Predicted Labels')
        plt.title('Confusion matrix')

        # 在图中标注数量/概率信息
        thresh = matrix.max() / 2
        for x in range(self.num_classes):
            for y in range(self.num_classes):
                # 注意这里的matrix[y, x]不是matrix[x, y]
                info = int(matrix[y, x])
                plt.text(x, y, info,
                         verticalalignment='center',
                         horizontalalignment='center',
                         color="white" if info > thresh else "black")
        plt.tight_layout()
        plt.show()


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print(f"using device: {device}")

    _, _, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = 384
    data_transform = {
        "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)),
                                   transforms.CenterCrop(img_size),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    nw = min([os.cpu_count(), args.batch_size if args.batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes)
    # load pretrain weights
    assert os.path.exists(args.weights), "cannot find {} file".format(args.weights)
    model.load_state_dict(torch.load(args.weights, map_location=device))
    model.to(device)

    # read class_indict
    json_label_path = './class_indices.json'
    assert os.path.exists(json_label_path), "cannot find {} file".format(json_label_path)
    json_file = open(json_label_path, 'r')
    class_indict = json.load(json_file)

    labels = [label for _, label in class_indict.items()]
    confusion = ConfusionMatrix(num_classes=args.num_classes, labels=labels)
    model.eval()
    with torch.no_grad():
        for val_data in tqdm(val_loader, file=sys.stdout):
            val_images, val_labels = val_data
            outputs = model(val_images.to(device))
            outputs = torch.softmax(outputs, dim=1)
            outputs = torch.argmax(outputs, dim=1)
            confusion.update(outputs.to("cpu").numpy(), val_labels.to("cpu").numpy())
    confusion.plot()
    confusion.summary()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--batch-size', type=int, default=2)

    # 数据集所在根目录
    # http://download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # 训练权重路径
    parser.add_argument('--weights', type=str, default='./weights/model-19.pth',
                        help='initial weights path')
    # 是否冻结权重
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/swin_transformer/model.py
================================================
""" Swin Transformer
A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`
    - https://arxiv.org/pdf/2103.14030

Code/weights from https://github.com/microsoft/Swin-Transformer

"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
import numpy as np
from typing import Optional


def drop_path_f(x, drop_prob: float = 0., training: bool = False):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.

    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path_f(x, self.drop_prob, self.training)


def window_partition(x, window_size: int):
    """
    将feature map按照window_size划分成一个个没有重叠的window
    Args:
        x: (B, H, W, C)
        window_size (int): window size(M)

    Returns:
        windows: (num_windows*B, window_size, window_size, C)
    """
    B, H, W, C = x.shape
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    # permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]
    # view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]
    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    return windows


def window_reverse(windows, window_size: int, H: int, W: int):
    """
    将一个个window还原成一个feature map
    Args:
        windows: (num_windows*B, window_size, window_size, C)
        window_size (int): Window size(M)
        H (int): Height of image
        W (int): Width of image

    Returns:
        x: (B, H, W, C)
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    # view: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C]
    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
    # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C]
    # view: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C]
    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
    return x


class PatchEmbed(nn.Module):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, patch_size=4, in_c=3, embed_dim=96, norm_layer=None):
        super().__init__()
        patch_size = (patch_size, patch_size)
        self.patch_size = patch_size
        self.in_chans = in_c
        self.embed_dim = embed_dim
        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        _, _, H, W = x.shape

        # padding
        # 如果输入图片的H，W不是patch_size的整数倍，需要进行padding
        pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0)
        if pad_input:
            # to pad the last 3 dimensions,
            # (W_left, W_right, H_top,H_bottom, C_front, C_back)
            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1],
                          0, self.patch_size[0] - H % self.patch_size[0],
                          0, 0))

        # 下采样patch_size倍
        x = self.proj(x)
        _, _, H, W = x.shape
        # flatten: [B, C, H, W] -> [B, C, HW]
        # transpose: [B, C, HW] -> [B, HW, C]
        x = x.flatten(2).transpose(1, 2)
        x = self.norm(x)
        return x, H, W


class PatchMerging(nn.Module):
    r""" Patch Merging Layer.

    Args:
        dim (int): Number of input channels.
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, dim, norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
        self.norm = norm_layer(4 * dim)

    def forward(self, x, H, W):
        """
        x: B, H*W, C
        """
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"

        x = x.view(B, H, W, C)

        # padding
        # 如果输入feature map的H，W不是2的整数倍，需要进行padding
        pad_input = (H % 2 == 1) or (W % 2 == 1)
        if pad_input:
            # to pad the last 3 dimensions, starting from the last dimension and moving forward.
            # (C_front, C_back, W_left, W_right, H_top, H_bottom)
            # 注意这里的Tensor通道是[B, H, W, C]，所以会和官方文档有些不同
            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))

        x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]
        x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]
        x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]
        x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]
        x = torch.cat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]
        x = x.view(B, -1, 4 * C)  # [B, H/2*W/2, 4*C]

        x = self.norm(x)
        x = self.reduction(x)  # [B, H/2*W/2, 2*C]

        return x


class Mlp(nn.Module):
    """ MLP as used in Vision Transformer, MLP-Mixer and related networks
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features

        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.drop1 = nn.Dropout(drop)
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop2 = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        return x


class WindowAttention(nn.Module):
    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.

    Args:
        dim (int): Number of input channels.
        window_size (tuple[int]): The height and width of the window.
        num_heads (int): Number of attention heads.
        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
    """

    def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.):

        super().__init__()
        self.dim = dim
        self.window_size = window_size  # [Mh, Mw]
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5

        # define a parameter table of relative position bias
        self.relative_position_bias_table = nn.Parameter(
            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # [2*Mh-1 * 2*Mw-1, nH]

        # get pair-wise relative position index for each token inside the window
        coords_h = torch.arange(self.window_size[0])
        coords_w = torch.arange(self.window_size[1])
        coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing="ij"))  # [2, Mh, Mw]
        coords_flatten = torch.flatten(coords, 1)  # [2, Mh*Mw]
        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]
        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # [Mh*Mw, Mh*Mw, 2]
        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]
        self.register_buffer("relative_position_index", relative_position_index)

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        nn.init.trunc_normal_(self.relative_position_bias_table, std=.02)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x, mask: Optional[torch.Tensor] = None):
        """
        Args:
            x: input features with shape of (num_windows*B, Mh*Mw, C)
            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
        """
        # [batch_size*num_windows, Mh*Mw, total_embed_dim]
        B_, N, C = x.shape
        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]
        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]
        # permute: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)

        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]
        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]
        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))

        # relative_position_bias_table.view: [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]
        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)
        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # [nH, Mh*Mw, Mh*Mw]
        attn = attn + relative_position_bias.unsqueeze(0)

        if mask is not None:
            # mask: [nW, Mh*Mw, Mh*Mw]
            nW = mask.shape[0]  # num_windows
            # attn.view: [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]
            # mask.unsqueeze: [1, nW, 1, Mh*Mw, Mh*Mw]
            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
            attn = attn.view(-1, self.num_heads, N, N)
            attn = self.softmax(attn)
        else:
            attn = self.softmax(attn)

        attn = self.attn_drop(attn)

        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]
        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]
        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x


class SwinTransformerBlock(nn.Module):
    r""" Swin Transformer Block.

    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (int): Window size.
        shift_size (int): Shift size for SW-MSA.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float, optional): Stochastic depth rate. Default: 0.0
        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,
                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

        self.norm1 = norm_layer(dim)
        self.attn = WindowAttention(
            dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, qkv_bias=qkv_bias,
            attn_drop=attn_drop, proj_drop=drop)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

    def forward(self, x, attn_mask):
        H, W = self.H, self.W
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"

        shortcut = x
        x = self.norm1(x)
        x = x.view(B, H, W, C)

        # pad feature maps to multiples of window size
        # 把feature map给pad到window size的整数倍
        pad_l = pad_t = 0
        pad_r = (self.window_size - W % self.window_size) % self.window_size
        pad_b = (self.window_size - H % self.window_size) % self.window_size
        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
        _, Hp, Wp, _ = x.shape

        # cyclic shift
        if self.shift_size > 0:
            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
        else:
            shifted_x = x
            attn_mask = None

        # partition windows
        x_windows = window_partition(shifted_x, self.window_size)  # [nW*B, Mh, Mw, C]
        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # [nW*B, Mh*Mw, C]

        # W-MSA/SW-MSA
        attn_windows = self.attn(x_windows, mask=attn_mask)  # [nW*B, Mh*Mw, C]

        # merge windows
        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)  # [nW*B, Mh, Mw, C]
        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # [B, H', W', C]

        # reverse cyclic shift
        if self.shift_size > 0:
            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
        else:
            x = shifted_x

        if pad_r > 0 or pad_b > 0:
            # 把前面pad的数据移除掉
            x = x[:, :H, :W, :].contiguous()

        x = x.view(B, H * W, C)

        # FFN
        x = shortcut + self.drop_path(x)
        x = x + self.drop_path(self.mlp(self.norm2(x)))

        return x


class BasicLayer(nn.Module):
    """
    A basic Swin Transformer layer for one stage.

    Args:
        dim (int): Number of input channels.
        depth (int): Number of blocks.
        num_heads (int): Number of attention heads.
        window_size (int): Local window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
    """

    def __init__(self, dim, depth, num_heads, window_size,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):
        super().__init__()
        self.dim = dim
        self.depth = depth
        self.window_size = window_size
        self.use_checkpoint = use_checkpoint
        self.shift_size = window_size // 2

        # build blocks
        self.blocks = nn.ModuleList([
            SwinTransformerBlock(
                dim=dim,
                num_heads=num_heads,
                window_size=window_size,
                shift_size=0 if (i % 2 == 0) else self.shift_size,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=drop,
                attn_drop=attn_drop,
                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
                norm_layer=norm_layer)
            for i in range(depth)])

        # patch merging layer
        if downsample is not None:
            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
        else:
            self.downsample = None

    def create_mask(self, x, H, W):
        # calculate attention mask for SW-MSA
        # 保证Hp和Wp是window_size的整数倍
        Hp = int(np.ceil(H / self.window_size)) * self.window_size
        Wp = int(np.ceil(W / self.window_size)) * self.window_size
        # 拥有和feature map一样的通道排列顺序，方便后续window_partition
        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # [1, Hp, Wp, 1]
        h_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        w_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        cnt = 0
        for h in h_slices:
            for w in w_slices:
                img_mask[:, h, w, :] = cnt
                cnt += 1

        mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]
        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)  # [nW, Mh*Mw]
        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)  # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]
        # [nW, Mh*Mw, Mh*Mw]
        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
        return attn_mask

    def forward(self, x, H, W):
        attn_mask = self.create_mask(x, H, W)  # [nW, Mh*Mw, Mh*Mw]
        for blk in self.blocks:
            blk.H, blk.W = H, W
            if not torch.jit.is_scripting() and self.use_checkpoint:
                x = checkpoint.checkpoint(blk, x, attn_mask)
            else:
                x = blk(x, attn_mask)
        if self.downsample is not None:
            x = self.downsample(x, H, W)
            H, W = (H + 1) // 2, (W + 1) // 2

        return x, H, W


class SwinTransformer(nn.Module):
    r""" Swin Transformer
        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
          https://arxiv.org/pdf/2103.14030

    Args:
        patch_size (int | tuple(int)): Patch size. Default: 4
        in_chans (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 1000
        embed_dim (int): Patch embedding dimension. Default: 96
        depths (tuple(int)): Depth of each Swin Transformer layer.
        num_heads (tuple(int)): Number of attention heads in different layers.
        window_size (int): Window size. Default: 7
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
        drop_rate (float): Dropout rate. Default: 0
        attn_drop_rate (float): Attention dropout rate. Default: 0
        drop_path_rate (float): Stochastic depth rate. Default: 0.1
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
        patch_norm (bool): If True, add normalization after patch embedding. Default: True
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
    """

    def __init__(self, patch_size=4, in_chans=3, num_classes=1000,
                 embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24),
                 window_size=7, mlp_ratio=4., qkv_bias=True,
                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
                 norm_layer=nn.LayerNorm, patch_norm=True,
                 use_checkpoint=False, **kwargs):
        super().__init__()

        self.num_classes = num_classes
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.patch_norm = patch_norm
        # stage4输出特征矩阵的channels
        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
        self.mlp_ratio = mlp_ratio

        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            patch_size=patch_size, in_c=in_chans, embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)
        self.pos_drop = nn.Dropout(p=drop_rate)

        # stochastic depth
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule

        # build layers
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            # 注意这里构建的stage和论文图中有些差异
            # 这里的stage不包含该stage的patch_merging层，包含的是下个stage的
            layers = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
                                depth=depths[i_layer],
                                num_heads=num_heads[i_layer],
                                window_size=window_size,
                                mlp_ratio=self.mlp_ratio,
                                qkv_bias=qkv_bias,
                                drop=drop_rate,
                                attn_drop=attn_drop_rate,
                                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                                norm_layer=norm_layer,
                                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
                                use_checkpoint=use_checkpoint)
            self.layers.append(layers)

        self.norm = norm_layer(self.num_features)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def forward(self, x):
        # x: [B, L, C]
        x, H, W = self.patch_embed(x)
        x = self.pos_drop(x)

        for layer in self.layers:
            x, H, W = layer(x, H, W)

        x = self.norm(x)  # [B, L, C]
        x = self.avgpool(x.transpose(1, 2))  # [B, C, 1]
        x = torch.flatten(x, 1)
        x = self.head(x)
        return x


def swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=96,
                            depths=(2, 2, 6, 2),
                            num_heads=(3, 6, 12, 24),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=96,
                            depths=(2, 2, 18, 2),
                            num_heads=(3, 6, 12, 24),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs):
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=192,
                            depths=(2, 2, 18, 2),
                            num_heads=(6, 12, 24, 48),
                            num_classes=num_classes,
                            **kwargs)
    return model


def swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):
    # trained ImageNet-22K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=192,
                            depths=(2, 2, 18, 2),
                            num_heads=(6, 12, 24, 48),
                            num_classes=num_classes,
                            **kwargs)
    return model


================================================
FILE: pytorch_classification/swin_transformer/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/swin_transformer/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import swin_tiny_patch4_window7_224 as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    img_size = 224
    data_transform = transforms.Compose(
        [transforms.Resize(int(img_size * 1.14)),
         transforms.CenterCrop(img_size),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-9.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/swin_transformer/select_incorrect_samples.py
================================================
"""
该脚本能够把验证集中预测错误的图片挑选出来，并记录在record.txt中
"""
import os
import json
import argparse
import sys

import torch
from torchvision import transforms
from tqdm import tqdm

from my_dataset import MyDataSet
from model import swin_base_patch4_window12_384_in22k as create_model
from utils import read_split_data


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    _, _, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = 384
    data_transform = {
        "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)),
                                   transforms.CenterCrop(img_size),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes).to(device)

    assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
    model.load_state_dict(torch.load(args.weights, map_location=device))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    json_file = open(json_path, "r")
    class_indict = json.load(json_file)

    model.eval()
    with torch.no_grad():
        with open("record.txt", "w") as f:
            # validate
            data_loader = tqdm(val_loader, file=sys.stdout)
            for step, data in enumerate(data_loader):
                images, labels = data
                pred = model(images.to(device))
                pred_classes = torch.max(pred, dim=1)[1]
                contrast = torch.eq(pred_classes, labels.to(device)).tolist()
                labels = labels.tolist()
                pred_classes = pred_classes.tolist()
                for i, flag in enumerate(contrast):
                    if flag is False:
                        file_name = val_images_path[batch_size * step + i]
                        true_label = class_indict[str(labels[i])]
                        false_label = class_indict[str(pred_classes[i])]
                        f.write(f"{file_name}  TrueLabel:{true_label}  PredictLabel:{false_label}\n")


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--batch-size', type=int, default=2)

    # 数据集所在根目录
    # http://download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # 训练权重路径
    parser.add_argument('--weights', type=str, default='./weights/model-19.pth',
                        help='initial weights path')
    # 是否冻结权重
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/swin_transformer/train.py
================================================
import os
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

from my_dataset import MyDataSet
from model import swin_tiny_patch4_window7_224 as create_model
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    tb_writer = SummaryWriter()

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = 224
    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(img_size),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)),
                                   transforms.CenterCrop(img_size),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes).to(device)

    if args.weights != "":
        assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
        weights_dict = torch.load(args.weights, map_location=device)["model"]
        # 删除有关分类类别的权重
        for k in list(weights_dict.keys()):
            if "head" in k:
                del weights_dict[k]
        print(model.load_state_dict(weights_dict, strict=False))

    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除head外，其他权重全部冻结
            if "head" not in name:
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=5E-2)

    for epoch in range(args.epochs):
        # train
        train_loss, train_acc = train_one_epoch(model=model,
                                                optimizer=optimizer,
                                                data_loader=train_loader,
                                                device=device,
                                                epoch=epoch)

        # validate
        val_loss, val_acc = evaluate(model=model,
                                     data_loader=val_loader,
                                     device=device,
                                     epoch=epoch)

        tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
        tb_writer.add_scalar(tags[0], train_loss, epoch)
        tb_writer.add_scalar(tags[1], train_acc, epoch)
        tb_writer.add_scalar(tags[2], val_loss, epoch)
        tb_writer.add_scalar(tags[3], val_acc, epoch)
        tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--lr', type=float, default=0.0001)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")

    # 预训练权重路径，如果不想载入就设置为空字符
    parser.add_argument('--weights', type=str, default='./swin_tiny_patch4_window7_224.pth',
                        help='initial weights path')
    # 是否冻结权重
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/swin_transformer/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    accu_loss = torch.zeros(1).to(device)  # 累计损失
    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    optimizer.zero_grad()

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        accu_loss += loss.detach()

        data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


@torch.no_grad()
def evaluate(model, data_loader, device, epoch):
    loss_function = torch.nn.CrossEntropyLoss()

    model.eval()

    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    accu_loss = torch.zeros(1).to(device)  # 累计损失

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        accu_loss += loss

        data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


================================================
FILE: pytorch_classification/tensorboard_test/data_utils.py
================================================
import os
import json
import pickle
import random

from PIL import Image
import torch
import numpy as np
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def plot_class_preds(net,
                     images_dir: str,
                     transform,
                     num_plot: int = 5,
                     device="cpu"):
    if not os.path.exists(images_dir):
        print("not found {} path, ignore add figure.".format(images_dir))
        return None

    label_path = os.path.join(images_dir, "label.txt")
    if not os.path.exists(label_path):
        print("not found {} file, ignore add figure".format(label_path))
        return None

    # read class_indict
    json_label_path = './class_indices.json'
    assert os.path.exists(json_label_path), "not found {}".format(json_label_path)
    json_file = open(json_label_path, 'r')
    # {"0": "daisy"}
    flower_class = json.load(json_file)
    # {"daisy": "0"}
    class_indices = dict((v, k) for k, v in flower_class.items())

    # reading label.txt file
    label_info = []
    with open(label_path, "r") as rd:
        for line in rd.readlines():
            line = line.strip()
            if len(line) > 0:
                split_info = [i for i in line.split(" ") if len(i) > 0]
                assert len(split_info) == 2, "label format error, expect file_name and class_name"
                image_name, class_name = split_info
                image_path = os.path.join(images_dir, image_name)
                # 如果文件不存在，则跳过
                if not os.path.exists(image_path):
                    print("not found {}, skip.".format(image_path))
                    continue
                # 如果读取的类别不在给定的类别内，则跳过
                if class_name not in class_indices.keys():
                    print("unrecognized category {}, skip".format(class_name))
                    continue
                label_info.append([image_path, class_name])

    if len(label_info) == 0:
        return None

    # get first num_plot info
    if len(label_info) > num_plot:
        label_info = label_info[:num_plot]

    num_imgs = len(label_info)
    images = []
    labels = []
    for img_path, class_name in label_info:
        # read img
        img = Image.open(img_path).convert("RGB")
        label_index = int(class_indices[class_name])

        # preprocessing
        img = transform(img)
        images.append(img)
        labels.append(label_index)

    # batching images
    images = torch.stack(images, dim=0).to(device)

    # inference
    with torch.no_grad():
        output = net(images)
        probs, preds = torch.max(torch.softmax(output, dim=1), dim=1)
        probs = probs.cpu().numpy()
        preds = preds.cpu().numpy()

    # width, height
    fig = plt.figure(figsize=(num_imgs * 2.5, 3), dpi=100)
    for i in range(num_imgs):
        # 1：子图共1行，num_imgs:子图共num_imgs列，当前绘制第i+1个子图
        ax = fig.add_subplot(1, num_imgs, i+1, xticks=[], yticks=[])

        # CHW -> HWC
        npimg = images[i].cpu().numpy().transpose(1, 2, 0)

        # 将图像还原至标准化之前
        # mean:[0.485, 0.456, 0.406], std:[0.229, 0.224, 0.225]
        npimg = (npimg * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
        plt.imshow(npimg.astype('uint8'))

        title = "{}, {:.2f}%\n(label: {})".format(
            flower_class[str(preds[i])],  # predict class
            probs[i] * 100,  # predict probability
            flower_class[str(labels[i])]  # true class
        )
        ax.set_title(title, color=("green" if preds[i] == labels[i] else "red"))

    return fig


================================================
FILE: pytorch_classification/tensorboard_test/model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: pytorch_classification/tensorboard_test/my_dataset.py
================================================
from tqdm import tqdm
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

        delete_img = []
        for index, img_path in tqdm(enumerate(images_path)):
            img = Image.open(img_path)
            w, h = img.size
            ratio = w / h
            if ratio > 10 or ratio < 0.1:
                delete_img.append(index)
                # print(img_path, ratio)

        for index in delete_img[::-1]:
            self.images_path.pop(index)
            self.images_class.pop(index)

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/tensorboard_test/requirements.txt
================================================
torchvision==0.7.0
tqdm==4.42.1
matplotlib==3.2.1
torch==1.13.1
Pillow
tensorboard


================================================
FILE: pytorch_classification/tensorboard_test/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import resnet34
from my_dataset import MyDataSet
from data_utils import read_split_data, plot_class_preds
from train_eval_utils import train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    # 实例化SummaryWriter对象
    tb_writer = SummaryWriter(log_dir="runs/flower_experiment")
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    # 划分数据为训练集和验证集
    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    # 定义训练以及预测时的预处理方法
    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    batch_size = args.batch_size
    # 计算使用num_workers的数量
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_data_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_data_set,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_data_set.collate_fn)

    # 实例化模型
    model = resnet34(num_classes=args.num_classes).to(device)

    # 将模型写入tensorboard
    init_img = torch.zeros((1, 3, 224, 224), device=device)
    tb_writer.add_graph(model, init_img)

    # 如果存在预训练权重则载入
    if os.path.exists(args.weights):
        weights_dict = torch.load(args.weights, map_location=device)
        load_weights_dict = {k: v for k, v in weights_dict.items()
                             if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(load_weights_dict, strict=False)
    else:
        print("not using pretrain-weights.")

    # 是否冻结权重
    if args.freeze_layers:
        print("freeze layers except fc layer.")
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)
        # update learning rate
        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)

        # add loss, acc and lr into tensorboard
        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["train_loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        # add figure into tensorboard
        fig = plot_class_preds(net=model,
                               images_dir="./plot_img",
                               transform=data_transform["val"],
                               num_plot=5,
                               device=device)
        if fig is not None:
            tb_writer.add_figure("predictions vs. actuals",
                                 figure=fig,
                                 global_step=epoch)

        # add conv1 weights into tensorboard
        tb_writer.add_histogram(tag="conv1",
                                values=model.conv1.weight,
                                global_step=epoch)
        tb_writer.add_histogram(tag="layer1/block0/conv1",
                                values=model.layer1[0].conv1.weight,
                                global_step=epoch)

        # save weights
        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.1)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    img_root = "/home/wz/my_project/my_github/data_set/flower_data/flower_photos"
    parser.add_argument('--data-path', type=str, default=img_root)

    # resnet34 官方权重下载地址
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    parser.add_argument('--weights', type=str, default='resNet34.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/tensorboard_test/train_eval_utils.py
================================================
import sys

from tqdm import tqdm
import torch


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        # 打印平均loss
        data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)
    # 统计验证集样本总数目
    num_samples = len(data_loader.dataset)

    # 打印验证进度
    data_loader = tqdm(data_loader, desc="validation...", file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    # 计算预测正确的比例
    acc = sum_num.item() / num_samples

    return acc


================================================
FILE: pytorch_classification/train_multi_GPU/README.md
================================================
## 多GPU启动指令
- 如果要使用```train_multi_gpu_using_launch.py```脚本，使用以下指令启动
- ```python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_gpu_using_launch.py```
- 其中```nproc_per_node```为并行GPU的数量
- 如果要指定使用某几块GPU可使用如下指令，例如使用第1块和第4块GPU进行训练：
- ```CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_gpu_using_launch.py```

-----

- 如果要使用```train_multi_gpu_using_spawn.py```脚本，使用以下指令启动
- ```python train_multi_gpu_using_spawn.py```

## 训练时间对比
![training time](training_time.png)

## 是否使用SyncBatchNorm
![syncbn](syncbn.png)

## 单GPU与多GPU训练曲线
![accuracy](accuracy.png)


================================================
FILE: pytorch_classification/train_multi_GPU/model.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: pytorch_classification/train_multi_GPU/multi_train_utils/distributed_utils.py
================================================
import os

import torch
import torch.distributed as dist


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'  # 通信后端，nvidia GPU推荐使用NCCL
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                            world_size=args.world_size, rank=args.rank)
    dist.barrier()


def cleanup():
    dist.destroy_process_group()


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def reduce_value(value, average=True):
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return value

    with torch.no_grad():
        dist.all_reduce(value)
        if average:
            value /= world_size

        return value


================================================
FILE: pytorch_classification/train_multi_GPU/multi_train_utils/train_eval_utils.py
================================================
import sys

from tqdm import tqdm
import torch

from multi_train_utils.distributed_utils import reduce_value, is_main_process


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    # 在进程0中打印训练进度
    if is_main_process():
        data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data

        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        loss = reduce_value(loss, average=True)
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        # 在进程0中打印平均loss
        if is_main_process():
            data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    # 等待所有进程计算完毕
    if device != torch.device("cpu"):
        torch.cuda.synchronize(device)

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    # 在进程0中打印验证进度
    if is_main_process():
        data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    # 等待所有进程计算完毕
    if device != torch.device("cpu"):
        torch.cuda.synchronize(device)

    sum_num = reduce_value(sum_num, average=False)

    return sum_num.item()


================================================
FILE: pytorch_classification/train_multi_GPU/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/train_multi_GPU/plot_results.py
================================================
import math
import matplotlib.pyplot as plt

x = [0, 1, 2, 3]
y = [9, 5.5, 3, 2]

plt.bar(x, y, align='center')
plt.xticks(range(len(x)), ['One-GPU', '2 GPUs', '4 GPUs', '8 GPUs'])
plt.ylim((0, 10))
for i, v in enumerate(y):
    plt.text(x=i, y=v + 0.1, s=str(v) + ' s', ha='center')
plt.xlabel('Using number of GPU device')
plt.ylabel('Training time per epoch (second)')
plt.show()
plt.close()

x = list(range(30))
no_SyncBatchNorm = [0.348, 0.495, 0.587, 0.554, 0.637,
                    0.622, 0.689, 0.673, 0.702, 0.717,
                    0.717, 0.69, 0.716, 0.696, 0.738,
                    0.75, 0.75, 0.66, 0.713, 0.758,
                    0.777, 0.777, 0.769, 0.792, 0.802,
                    0.807, 0.807, 0.804, 0.812, 0.811]

SyncBatchNorm = [0.283, 0.514, 0.531, 0.654, 0.671,
                 0.591, 0.621, 0.685, 0.701, 0.732,
                 0.701, 0.74, 0.667, 0.723, 0.745,
                 0.679, 0.738, 0.772, 0.764, 0.765,
                 0.764, 0.791, 0.818, 0.791, 0.807,
                 0.806, 0.811, 0.821, 0.833, 0.81]

plt.plot(x, no_SyncBatchNorm, label="No SyncBatchNorm")
plt.plot(x, SyncBatchNorm, label="SyncBatchNorm")
plt.xlabel('Training epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
plt.close()


x = list(range(30))
single_gpu = [0.569, 0.576, 0.654, 0.648, 0.609,
              0.637, 0.699, 0.709, 0.715, 0.715,
              0.717, 0.724, 0.722, 0.731, 0.721,
              0.774, 0.751, 0.787, 0.78, 0.77,
              0.763, 0.803, 0.754, 0.796, 0.799,
              0.815, 0.793, 0.808, 0.811, 0.806]
plt.plot(x, single_gpu, color="black", label="Single GPU")
plt.plot(x, no_SyncBatchNorm, label="No SyncBatchNorm")
plt.plot(x, SyncBatchNorm, label="SyncBatchNorm")
plt.xlabel('Training epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
plt.close()


# epochs = 30
# lrf = 0.1
# lf0 = lambda x: math.cos(x * math.pi / epochs)
# lf1 = lambda x: 1 + math.cos(x * math.pi / epochs)
# lf2 = lambda x: (1 + math.cos(x * math.pi / epochs)) / 2
# lf3 = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf
# x = range(epochs)
# y0 = [lf0(epoch) for epoch in x]
# y1 = [lf1(epoch) for epoch in x]
# y2 = [lf2(epoch) for epoch in x]
# y3 = [lf3(epoch) for epoch in x]
# plt.subplot(2, 2, 1)
# plt.plot(x, y0)
# plt.hlines(1, 0, epochs-1, colors="r", linestyles="dashed")
# plt.hlines(-1, 0, epochs-1, colors="r", linestyles="dashed")
# plt.xlim((0, epochs-1))
#
# plt.subplot(2, 2, 2)
# plt.plot(x, y1)
# plt.hlines(2, 0, epochs-1, colors="r", linestyles="dashed")
# plt.hlines(0, 0, epochs-1, colors="r", linestyles="dashed")
# plt.xlim((0, epochs-1))
#
# plt.subplot(2, 2, 3)
# plt.plot(x, y2)
# plt.hlines(1, 0, epochs-1, colors="r", linestyles="dashed")
# plt.hlines(0, 0, epochs-1, colors="r", linestyles="dashed")
# plt.xlim((0, epochs-1))
#
# plt.subplot(2, 2, 4)
# plt.plot(x, y3)
# plt.hlines(1, 0, epochs-1, colors="r", linestyles="dashed")
# plt.hlines(lrf, 0, epochs-1, colors="r", linestyles="dashed")
# plt.text(epochs-1, y3[-1], "{}".format(round(y3[-1], 1)))
# plt.xlim((0, epochs-1))
#
# plt.show()
# plt.close()


================================================
FILE: pytorch_classification/train_multi_GPU/requirements.txt
================================================
matplotlib==3.2.1
tqdm==4.42.1
torchvision==0.7.0
torch==1.13.1


================================================
FILE: pytorch_classification/train_multi_GPU/train_multi_gpu_using_launch.py
================================================
import os
import math
import tempfile
import argparse

import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

from model import resnet34
from my_dataset import MyDataSet
from utils import read_split_data, plot_data_loader_image
from multi_train_utils.distributed_utils import init_distributed_mode, dist, cleanup
from multi_train_utils.train_eval_utils import train_one_epoch, evaluate


def main(args):
    if torch.cuda.is_available() is False:
        raise EnvironmentError("not find GPU device for training.")

    # 初始化各进程环境
    init_distributed_mode(args=args)

    rank = args.rank
    device = torch.device(args.device)
    batch_size = args.batch_size
    weights_path = args.weights
    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增
    checkpoint_path = ""

    if rank == 0:  # 在第一个进程中打印信息，并实例化tensorboard
        print(args)
        print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
        tb_writer = SummaryWriter()
        if os.path.exists("./weights") is False:
            os.makedirs("./weights")

    train_info, val_info, num_classes = read_split_data(args.data_path)
    train_images_path, train_images_label = train_info
    val_images_path, val_images_label = val_info

    # check num_classes
    assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(args.num_classes,
                                                                                       num_classes)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    # 给每个rank对应的进程分配训练的样本索引
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)

    # 将样本索引每batch_size个元素组成一个list
    train_batch_sampler = torch.utils.data.BatchSampler(
        train_sampler, batch_size, drop_last=True)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    if rank == 0:
        print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_data_set,
                                               batch_sampler=train_batch_sampler,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_data_set,
                                             batch_size=batch_size,
                                             sampler=val_sampler,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_data_set.collate_fn)
    # 实例化模型
    model = resnet34(num_classes=num_classes).to(device)

    # 如果存在预训练权重则载入
    if os.path.exists(weights_path):
        weights_dict = torch.load(weights_path, map_location=device)
        load_weights_dict = {k: v for k, v in weights_dict.items()
                             if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(load_weights_dict, strict=False)
    else:
        checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt")
        # 如果不存在预训练权重，需要将第一个进程中的权重保存，然后其他进程载入，保持初始化权重一致
        if rank == 0:
            torch.save(model.state_dict(), checkpoint_path)

        dist.barrier()
        # 这里注意，一定要指定map_location参数，否则会导致第一块GPU占用更多资源
        model.load_state_dict(torch.load(checkpoint_path, map_location=device))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)
    else:
        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义
        if args.syncBN:
            # 使用SyncBatchNorm后训练会更耗时
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)

    # 转为DDP模型
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    # optimizer
    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        train_sampler.set_epoch(epoch)

        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        sum_num = evaluate(model=model,
                           data_loader=val_loader,
                           device=device)
        acc = sum_num / val_sampler.total_size

        if rank == 0:
            print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
            tags = ["loss", "accuracy", "learning_rate"]
            tb_writer.add_scalar(tags[0], mean_loss, epoch)
            tb_writer.add_scalar(tags[1], acc, epoch)
            tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

            torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch))

    # 删除临时缓存文件
    if rank == 0:
        if os.path.exists(checkpoint_path) is True:
            os.remove(checkpoint_path)

    cleanup()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.1)
    # 是否启用SyncBatchNorm
    parser.add_argument('--syncBN', type=bool, default=True)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str, default="/home/wz/data_set/flower_data/flower_photos")

    # resnet34 官方权重下载地址
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    parser.add_argument('--weights', type=str, default='resNet34.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    # 不要改该参数，系统会自动分配
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')
    # 开启的进程数(注意不是线程),不用设置该参数，会根据nproc_per_node自动设置
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/train_multi_GPU/train_multi_gpu_using_spawn.py
================================================
import os
import math
import tempfile
import argparse

import torch
import torch.multiprocessing as mp
from torch.multiprocessing import Process
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

from model import resnet34
from my_dataset import MyDataSet
from utils import read_split_data, plot_data_loader_image
from multi_train_utils.distributed_utils import dist, cleanup
from multi_train_utils.train_eval_utils import train_one_epoch, evaluate


def main_fun(rank, world_size, args):
    if torch.cuda.is_available() is False:
        raise EnvironmentError("not find GPU device for training.")

    # 初始化各进程环境 start
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = "12355"

    args.rank = rank
    args.world_size = world_size
    args.gpu = rank

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                            world_size=args.world_size, rank=args.rank)
    dist.barrier()
    # 初始化各进程环境 end

    rank = args.rank
    device = torch.device(args.device)
    batch_size = args.batch_size
    weights_path = args.weights
    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增
    checkpoint_path = ""

    if rank == 0:  # 在第一个进程中打印信息，并实例化tensorboard
        print(args)
        print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
        tb_writer = SummaryWriter()
        if os.path.exists("./weights") is False:
            os.makedirs("./weights")

    train_info, val_info, num_classes = read_split_data(args.data_path)
    train_images_path, train_images_label = train_info
    val_images_path, val_images_label = val_info

    # check num_classes
    assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(args.num_classes,
                                                                                       num_classes)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    # 给每个rank对应的进程分配训练的样本索引
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)

    # 将样本索引每batch_size个元素组成一个list
    train_batch_sampler = torch.utils.data.BatchSampler(
        train_sampler, batch_size, drop_last=True)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    if rank == 0:
        print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_data_set,
                                               batch_sampler=train_batch_sampler,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_data_set,
                                             batch_size=batch_size,
                                             sampler=val_sampler,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_data_set.collate_fn)
    # 实例化模型
    model = resnet34(num_classes=num_classes).to(device)

    # 如果存在预训练权重则载入
    if os.path.exists(weights_path):
        weights_dict = torch.load(weights_path, map_location=device)
        load_weights_dict = {k: v for k, v in weights_dict.items()
                             if model.state_dict()[k].numel() == v.numel()}
        model.load_state_dict(load_weights_dict, strict=False)
    else:
        checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt")
        # 如果不存在预训练权重，需要将第一个进程中的权重保存，然后其他进程载入，保持初始化权重一致
        if rank == 0:
            torch.save(model.state_dict(), checkpoint_path)

        dist.barrier()
        # 这里注意，一定要指定map_location参数，否则会导致第一块GPU占用更多资源
        model.load_state_dict(torch.load(checkpoint_path, map_location=device))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)
    else:
        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义
        if args.syncBN:
            # 使用SyncBatchNorm后训练会更耗时
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)

    # 转为DDP模型
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    # optimizer
    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        train_sampler.set_epoch(epoch)

        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        sum_num = evaluate(model=model,
                           data_loader=val_loader,
                           device=device)
        acc = sum_num / val_sampler.total_size

        if rank == 0:
            print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
            tags = ["loss", "accuracy", "learning_rate"]
            tb_writer.add_scalar(tags[0], mean_loss, epoch)
            tb_writer.add_scalar(tags[1], acc, epoch)
            tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

            torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch))

    # 删除临时缓存文件
    if rank == 0:
        if os.path.exists(checkpoint_path) is True:
            os.remove(checkpoint_path)

    cleanup()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.1)
    # 是否启用SyncBatchNorm
    parser.add_argument('--syncBN', type=bool, default=True)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str, default="/home/wz/data_set/flower_data/flower_photos")

    # resnet34 官方权重下载地址
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    parser.add_argument('--weights', type=str, default='resNet34.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    # 不要改该参数，系统会自动分配
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')
    # 开启的进程数(注意不是线程),在单机中指使用GPU的数量
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    opt = parser.parse_args()

    # when using mp.spawn, if I set number of works greater 1,
    # before each epoch training and validation will wait about 10 seconds

    # mp.spawn(main_fun,
    #          args=(opt.world_size, opt),
    #          nprocs=opt.world_size,
    #          join=True)

    world_size = opt.world_size
    processes = []
    for rank in range(world_size):
        p = Process(target=main_fun, args=(rank, world_size, opt))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()


================================================
FILE: pytorch_classification/train_multi_GPU/train_single_gpu.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import resnet34, resnet101
from my_dataset import MyDataSet
from utils import read_split_data
from multi_train_utils.train_eval_utils import train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_info, val_info, num_classes = read_split_data(args.data_path)
    train_images_path, train_images_label = train_info
    val_images_path, val_images_label = val_info

    # check num_classes
    assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(args.num_classes,
                                                                                       num_classes)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_data_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_data_set,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_data_set.collate_fn)

    # 如果存在预训练权重则载入
    model = resnet34(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {k: v for k, v in weights_dict.items()
                                 if model.state_dict()[k].numel() == v.numel()}
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外，其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        sum_num = evaluate(model=model,
                           data_loader=val_loader,
                           device=device)
        acc = sum_num / len(val_data_set)
        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.1)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/home/w180662/my_project/my_github/data_set/flower_data/flower_photos")

    # resnet34 官方权重下载地址
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    parser.add_argument('--weights', type=str, default='resNet34.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/train_multi_GPU/utils.py
================================================
import os
import json
import pickle
import random

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    class_names = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    class_names.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(class_names))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in class_names:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(class_names)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(class_names)), class_names)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return [train_images_path, train_images_label], [val_images_path, val_images_label], len(class_names)


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


================================================
FILE: pytorch_classification/vision_transformer/README.md
================================================
## 代码使用简介

1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),
如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0
2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重，在`vit_model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重
4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数


================================================
FILE: pytorch_classification/vision_transformer/flops.py
================================================
import torch
from fvcore.nn import FlopCountAnalysis

from vit_model import Attention


def main():
    # Self-Attention
    a1 = Attention(dim=512, num_heads=1)
    a1.proj = torch.nn.Identity()  # remove Wo

    # Multi-Head Attention
    a2 = Attention(dim=512, num_heads=8)

    # [batch_size, num_tokens, total_embed_dim]
    t = (torch.rand(32, 1024, 512),)

    flops1 = FlopCountAnalysis(a1, t)
    print("Self-Attention FLOPs:", flops1.total())

    flops2 = FlopCountAnalysis(a2, t)
    print("Multi-Head Attention FLOPs:", flops2.total())


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/vision_transformer/my_dataset.py
================================================
from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels


================================================
FILE: pytorch_classification/vision_transformer/predict.py
================================================
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from vit_model import vit_base_patch16_224_in21k as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=5, has_logits=False).to(device)
    # load model weights
    model_weight_path = "./weights/model-9.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_classification/vision_transformer/train.py
================================================
import os
import math
import argparse

import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms


from my_dataset import MyDataSet
from vit_model import vit_base_patch16_224_in21k as create_model
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    tb_writer = SummaryWriter()

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
        "val": transforms.Compose([transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes, has_logits=False).to(device)

    if args.weights != "":
        assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
        weights_dict = torch.load(args.weights, map_location=device)
        # 删除不需要的权重
        del_keys = ['head.weight', 'head.bias'] if model.has_logits \
            else ['pre_logits.fc.weight', 'pre_logits.fc.bias', 'head.weight', 'head.bias']
        for k in del_keys:
            del weights_dict[k]
        print(model.load_state_dict(weights_dict, strict=False))

    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除head, pre_logits外，其他权重全部冻结
            if "head" not in name and "pre_logits" not in name:
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        train_loss, train_acc = train_one_epoch(model=model,
                                                optimizer=optimizer,
                                                data_loader=train_loader,
                                                device=device,
                                                epoch=epoch)

        scheduler.step()

        # validate
        val_loss, val_acc = evaluate(model=model,
                                     data_loader=val_loader,
                                     device=device,
                                     epoch=epoch)

        tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
        tb_writer.add_scalar(tags[0], train_loss, epoch)
        tb_writer.add_scalar(tags[1], train_acc, epoch)
        tb_writer.add_scalar(tags[2], val_loss, epoch)
        tb_writer.add_scalar(tags[3], val_acc, epoch)
        tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lrf', type=float, default=0.01)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="/data/flower_photos")
    parser.add_argument('--model-name', default='', help='create model name')

    # 预训练权重路径，如果不想载入就设置为空字符
    parser.add_argument('--weights', type=str, default='./vit_base_patch16_224_in21k.pth',
                        help='initial weights path')
    # 是否冻结权重
    parser.add_argument('--freeze-layers', type=bool, default=True)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)


================================================
FILE: pytorch_classification/vision_transformer/utils.py
================================================
import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    accu_loss = torch.zeros(1).to(device)  # 累计损失
    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    optimizer.zero_grad()

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        accu_loss += loss.detach()

        data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


@torch.no_grad()
def evaluate(model, data_loader, device, epoch):
    loss_function = torch.nn.CrossEntropyLoss()

    model.eval()

    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数
    accu_loss = torch.zeros(1).to(device)  # 累计损失

    sample_num = 0
    data_loader = tqdm(data_loader, file=sys.stdout)
    for step, data in enumerate(data_loader):
        images, labels = data
        sample_num += images.shape[0]

        pred = model(images.to(device))
        pred_classes = torch.max(pred, dim=1)[1]
        accu_num += torch.eq(pred_classes, labels.to(device)).sum()

        loss = loss_function(pred, labels.to(device))
        accu_loss += loss

        data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
                                                                               accu_loss.item() / (step + 1),
                                                                               accu_num.item() / sample_num)

    return accu_loss.item() / (step + 1), accu_num.item() / sample_num


================================================
FILE: pytorch_classification/vision_transformer/vit_model.py
================================================
"""
original code from rwightman:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
"""
from functools import partial
from collections import OrderedDict

import torch
import torch.nn as nn


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class PatchEmbed(nn.Module):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):
        super().__init__()
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)
        self.img_size = img_size
        self.patch_size = patch_size
        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
        self.num_patches = self.grid_size[0] * self.grid_size[1]

        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, C, H, W = x.shape
        assert H == self.img_size[0] and W == self.img_size[1], \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."

        # flatten: [B, C, H, W] -> [B, C, HW]
        # transpose: [B, C, HW] -> [B, HW, C]
        x = self.proj(x).flatten(2).transpose(1, 2)
        x = self.norm(x)
        return x


class Attention(nn.Module):
    def __init__(self,
                 dim,   # 输入token的dim
                 num_heads=8,
                 qkv_bias=False,
                 qk_scale=None,
                 attn_drop_ratio=0.,
                 proj_drop_ratio=0.):
        super(Attention, self).__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5
        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop_ratio)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop_ratio)

    def forward(self, x):
        # [batch_size, num_patches + 1, total_embed_dim]
        B, N, C = x.shape

        # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]
        # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]
        # permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        # [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)

        # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]
        # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)

        # @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]
        # reshape: -> [batch_size, num_patches + 1, total_embed_dim]
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x


class Mlp(nn.Module):
    """
    MLP as used in Vision Transformer, MLP-Mixer and related networks
    """
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


class Block(nn.Module):
    def __init__(self,
                 dim,
                 num_heads,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_ratio=0.,
                 attn_drop_ratio=0.,
                 drop_path_ratio=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm):
        super(Block, self).__init__()
        self.norm1 = norm_layer(dim)
        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
                              attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio)
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio)

    def forward(self, x):
        x = x + self.drop_path(self.attn(self.norm1(x)))
        x = x + self.drop_path(self.mlp(self.norm2(x)))
        return x


class VisionTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000,
                 embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True,
                 qk_scale=None, representation_size=None, distilled=False, drop_ratio=0.,
                 attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None,
                 act_layer=None):
        """
        Args:
            img_size (int, tuple): input image size
            patch_size (int, tuple): patch size
            in_c (int): number of input channels
            num_classes (int): number of classes for classification head
            embed_dim (int): embedding dimension
            depth (int): depth of transformer
            num_heads (int): number of attention heads
            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
            qkv_bias (bool): enable bias for qkv if True
            qk_scale (float): override default qk scale of head_dim ** -0.5 if set
            representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
            distilled (bool): model includes a distillation token and head as in DeiT models
            drop_ratio (float): dropout rate
            attn_drop_ratio (float): attention dropout rate
            drop_path_ratio (float): stochastic depth rate
            embed_layer (nn.Module): patch embedding layer
            norm_layer: (nn.Module): normalization layer
        """
        super(VisionTransformer, self).__init__()
        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
        self.num_tokens = 2 if distilled else 1
        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
        act_layer = act_layer or nn.GELU

        self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_ratio)

        dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)]  # stochastic depth decay rule
        self.blocks = nn.Sequential(*[
            Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
                  drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i],
                  norm_layer=norm_layer, act_layer=act_layer)
            for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # Representation layer
        if representation_size and not distilled:
            self.has_logits = True
            self.num_features = representation_size
            self.pre_logits = nn.Sequential(OrderedDict([
                ("fc", nn.Linear(embed_dim, representation_size)),
                ("act", nn.Tanh())
            ]))
        else:
            self.has_logits = False
            self.pre_logits = nn.Identity()

        # Classifier head(s)
        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
        self.head_dist = None
        if distilled:
            self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()

        # Weight init
        nn.init.trunc_normal_(self.pos_embed, std=0.02)
        if self.dist_token is not None:
            nn.init.trunc_normal_(self.dist_token, std=0.02)

        nn.init.trunc_normal_(self.cls_token, std=0.02)
        self.apply(_init_vit_weights)

    def forward_features(self, x):
        # [B, C, H, W] -> [B, num_patches, embed_dim]
        x = self.patch_embed(x)  # [B, 196, 768]
        # [1, 1, 768] -> [B, 1, 768]
        cls_token = self.cls_token.expand(x.shape[0], -1, -1)
        if self.dist_token is None:
            x = torch.cat((cls_token, x), dim=1)  # [B, 197, 768]
        else:
            x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1)

        x = self.pos_drop(x + self.pos_embed)
        x = self.blocks(x)
        x = self.norm(x)
        if self.dist_token is None:
            return self.pre_logits(x[:, 0])
        else:
            return x[:, 0], x[:, 1]

    def forward(self, x):
        x = self.forward_features(x)
        if self.head_dist is not None:
            x, x_dist = self.head(x[0]), self.head_dist(x[1])
            if self.training and not torch.jit.is_scripting():
                # during inference, return the average of both classifier predictions
                return x, x_dist
            else:
                return (x + x_dist) / 2
        else:
            x = self.head(x)
        return x


def _init_vit_weights(m):
    """
    ViT weight initialization
    :param m: module
    """
    if isinstance(m, nn.Linear):
        nn.init.trunc_normal_(m.weight, std=.01)
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode="fan_out")
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.LayerNorm):
        nn.init.zeros_(m.bias)
        nn.init.ones_(m.weight)


def vit_base_patch16_224(num_classes: int = 1000):
    """
    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA  密码: eu9f
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=None,
                              num_classes=num_classes)
    return model


def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=768 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_base_patch32_224(num_classes: int = 1000):
    """
    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg  密码: s5hl
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=None,
                              num_classes=num_classes)
    return model


def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=768 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_large_patch16_224(num_classes: int = 1000):
    """
    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ  密码: qqt8
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=None,
                              num_classes=num_classes)
    return model


def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=1024 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    weights ported from official Google JAX impl:
    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=1024 if has_logits else None,
                              num_classes=num_classes)
    return model


def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    NOTE: converted weights not currently available, too large for github release hosting.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=14,
                              embed_dim=1280,
                              depth=32,
                              num_heads=16,
                              representation_size=1280 if has_logits else None,
                              num_classes=num_classes)
    return model


================================================
FILE: pytorch_keypoint/DeepPose/README.md
================================================
# DeepPose
## 对应论文
论文名称：`DeepPose: Human Pose Estimation via Deep Neural Networks`  
论文arxiv链接：[https://arxiv.org/abs/1312.4659](https://arxiv.org/abs/1312.4659)

## 开发环境
开发环境主要信息如下，其他Python依赖详情可见`requirements.txt`文件
- Python3.10
- torch2.0.1+cu118（建议大于等于此版本）
- torchvision0.15.2+cu118（建议大于等于此版本）

## 训练数据集准备
该项目采用的训练数据是WFLW数据集（人脸98点检测），官方链接：[https://wywu.github.io/projects/LAB/WFLW.html](https://wywu.github.io/projects/LAB/WFLW.html)

在官方网页下载数据集后解压并组织成如下目录形式：
```
WFLW
 ├── WFLW_annotations
 │   ├── list_98pt_rect_attr_train_test
 │   └── list_98pt_test
 └── WFLW_images
     ├── 0--Parade
     ├── 1--Handshaking
     ├── 10--People_Marching
     ├── 11--Meeting
     ├── 12--Group
     └── ......
```

## 预训练权重准备
由于该项目默认使用的backbone是torchvision中的resnet50，在实例化模型时会自动下载在imagenet上的预训练权重。
- 若训练环境可正常联网，则会自动下载预训练权重
- 若训练环境无法正常链接网络，可预先在联网的机器上手动下载，下载链接：[https://download.pytorch.org/models/resnet50-11ad3fa6.pth](https://download.pytorch.org/models/resnet50-11ad3fa6.pth) 下载完成后将权重拷贝至训练服务器的`~/.cache/torch/hub/checkpoints`目录下即可

## 启动训练
将训练脚本中的`--dataset_dir`设置成自己构建的`WFLW`数据集绝对路径，例如`/home/wz/datasets/WFLW`
### 单卡训练
使用`train.py`脚本：
```bash
python train.py
```
### 多卡训练
使用`train_multi_GPU.py`脚本：
```
torchrun --nproc_per_node=8 train_multi_GPU.py
```
若要单独指定使用某些卡可在启动指令前加入`CUDA_VISIBLE_DEVICES`参数，例如：
```
CUDA_VISIBLE_DEVICES=4,5,6,7 torchrun --nproc_per_node=4 train_multi_GPU.py
```

## 训练好的权重下载地址
若没有训练条件或者只想简单体验下，可使用本人训练好的模型权重（包含optimizer等信息故文件会略大），该权重在WFLW验证集上的NME指标为`0.048`，百度网盘下载地址：[https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw](https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw) 
提取码：8fux

下载完成后在当前项目下创建一个`weights`文件夹，并将权重放置该文件夹内。

## 测试图片
可参考`predict.py`文件，将`img_path`设置成自己要预测的人脸图片（注意这里只支持单人脸的关键点检测，故需要提供单独的人脸图片，具体使用时可配合一个人脸检测器联合使用），例如输入图片：

![test.jpg](./test_img.jpg)

网络预测可视化结果为：

![predict.jpg](./predict.jpg)

## 导出ONNX模型（可选）
若需要导出ONNX模型可使用`export_onnx.py`脚本。

================================================
FILE: pytorch_keypoint/DeepPose/datasets.py
================================================
import os
from typing import List, Tuple

import cv2
import torch
import torch.utils.data as data
import numpy as np


class WFLWDataset(data.Dataset):
    """
    https://wywu.github.io/projects/LAB/WFLW.html

    dataset structure:

    ├── WFLW_annotations
    │   ├── list_98pt_rect_attr_train_test
    │   └── list_98pt_test
    └── WFLW_images
        ├── 0--Parade
        ├── 1--Handshaking
        ├── 10--People_Marching
        ├── 11--Meeting
        ├── 12--Group
        └── ......
    """
    def __init__(self,
                 root: str,
                 train: bool = True,
                 transforms=None):
        super().__init__()
        self.img_root = os.path.join(root, "WFLW_images")
        assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root)
        ana_txt_name = "list_98pt_rect_attr_train.txt" if train else "list_98pt_rect_attr_test.txt"
        self.anno_path = os.path.join(root, "WFLW_annotations", "list_98pt_rect_attr_train_test", ana_txt_name)
        assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path)

        self.transforms = transforms
        self.keypoints: List[np.ndarray] = []
        self.face_rects: List[List[int]] = []
        self.img_paths: List[str] = []
        with open(self.anno_path, "rt") as f:
            for line in f.readlines():
                if not line.strip():
                    continue

                split_list = line.strip().split(" ")
                keypoint_ = self.get_98_points(split_list)
                keypoint = np.array(keypoint_, dtype=np.float32).reshape((-1, 2))
                face_rect = list(map(int, split_list[196: 196 + 4]))  # xmin, ymin, xmax, ymax
                img_name = split_list[-1]

                self.keypoints.append(keypoint)
                self.face_rects.append(face_rect)
                self.img_paths.append(os.path.join(self.img_root, img_name))

    @staticmethod
    def get_5_points(keypoints: List[str]) -> List[float]:
        five_num = [76, 82, 54, 96, 97]
        five_keypoint = []
        for i in five_num:
            five_keypoint.append(keypoints[i * 2])
            five_keypoint.append(keypoints[i * 2 + 1])
        return list(map(float, five_keypoint))

    @staticmethod
    def get_98_points(keypoints: List[str]) -> List[float]:
        return list(map(float, keypoints[:196]))

    @staticmethod
    def collate_fn(batch_infos: List[Tuple[torch.Tensor, dict]]):
        imgs, ori_keypoints, keypoints, m_invs = [], [], [], []
        for info in batch_infos:
            imgs.append(info[0])
            ori_keypoints.append(info[1]["ori_keypoint"])
            keypoints.append(info[1]["keypoint"])
            m_invs.append(info[1]["m_inv"])

        imgs_tensor = torch.stack(imgs)
        keypoints_tensor = torch.stack(keypoints)
        ori_keypoints_tensor = torch.stack(ori_keypoints)
        m_invs_tensor = torch.stack(m_invs)

        targets = {"ori_keypoints": ori_keypoints_tensor,
                   "keypoints": keypoints_tensor,
                   "m_invs": m_invs_tensor}
        return imgs_tensor, targets

    def __getitem__(self, idx: int):
        img_bgr = cv2.imread(self.img_paths[idx], flags=cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        target = {
            "box": self.face_rects[idx],
            "ori_keypoint": self.keypoints[idx],
            "keypoint": self.keypoints[idx]
        }

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.keypoints)


if __name__ == '__main__':
    train_dataset = WFLWDataset("/home/wz/datasets/WFLW", train=True)
    print(len(train_dataset))

    eval_dataset = WFLWDataset("/home/wz/datasets/WFLW", train=False)
    print(len(eval_dataset))

    from utils import draw_keypoints
    img, target = train_dataset[0]
    keypoint = target["keypoint"]
    h, w, c = img.shape
    keypoint[:, 0] /= w
    keypoint[:, 1] /= h
    draw_keypoints(img, keypoint, "test_plot.jpg", is_rel=True)


================================================
FILE: pytorch_keypoint/DeepPose/export_onnx.py
================================================
import os
import torch
from model import create_deep_pose_model


def main():
    img_hw = [256, 256]
    num_keypoints = 98
    weights_path = "./weights/model_weights_209.pth"
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # create model
    model = create_deep_pose_model(num_keypoints=num_keypoints)

    # load model weights
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"])
    model.to(device)

    model.eval()
    with torch.inference_mode():
        x = torch.randn(size=(1, 3, img_hw[0], img_hw[1]), device=device)
        torch.onnx.export(model=model,
                          args=(x,),
                          f="deeppose.onnx")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_keypoint/DeepPose/model.py
================================================
import torch
import torch.nn as nn
from torchvision.models import resnet50, ResNet50_Weights


def create_deep_pose_model(num_keypoints: int) -> nn.Module:
    res50 = resnet50(ResNet50_Weights.IMAGENET1K_V2)
    in_features = res50.fc.in_features
    res50.fc = nn.Linear(in_features=in_features, out_features=num_keypoints * 2)

    return res50


if __name__ == '__main__':
    torch.manual_seed(1234)
    model = create_deep_pose_model(98)
    model.eval()
    with torch.inference_mode():
        x = torch.randn(1, 3, 224, 224)
        res = model(x)
        print(res.shape)


================================================
FILE: pytorch_keypoint/DeepPose/predict.py
================================================
import os

import torch
import numpy as np
from PIL import Image

import transforms
from model import create_deep_pose_model
from utils import draw_keypoints


def main():
    img_hw = [256, 256]
    num_keypoints = 98
    img_path = "./test_img.jpg"
    weights_path = "./weights/model_weights_209.pth"
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    transform = transforms.Compose([
        transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # load image
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = np.array(Image.open(img_path))
    h, w, c = img.shape
    target = {"box": [0, 0, w, h]}
    img_tensor, target = transform(img, target=target)
    # expand batch dimension
    img_tensor = img_tensor.unsqueeze(0)

    # create model
    model = create_deep_pose_model(num_keypoints=num_keypoints)

    # load model weights
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"])
    model.to(device)

    # prediction
    model.eval()
    with torch.inference_mode():
        with torch.autocast(device_type=device.type):
            pred = torch.squeeze(model(img_tensor.to(device))).reshape([-1, 2]).cpu().numpy()

        wh_tensor = np.array(img_hw[::-1], dtype=np.float32).reshape([1, 2])
        pred = pred * wh_tensor  # rel coord to abs coord
        pred = transforms.affine_points_np(pred, target["m_inv"].numpy())
        draw_keypoints(img, coordinate=pred, save_path="predict.jpg", radius=2)


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_keypoint/DeepPose/requirements.txt
================================================
torch>=2.0.1
torchvision>=0.15.2
opencv-python
tqdm
tensorboard

================================================
FILE: pytorch_keypoint/DeepPose/train.py
================================================
import os

import torch
import torch.amp
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import transforms
from model import create_deep_pose_model
from datasets import WFLWDataset
from train_utils.train_eval_utils import train_one_epoch, evaluate


def get_args_parser(add_help=True):
    import argparse

    parser = argparse.ArgumentParser(description="PyTorch DeepPose Training", add_help=add_help)
    parser.add_argument("--dataset_dir", type=str, default="/home/wz/datasets/WFLW", help="WFLW dataset directory")
    parser.add_argument("--device", type=str, default="cuda:0", help="training device, e.g. cpu, cuda:0")
    parser.add_argument("--save_weights_dir", type=str, default="./weights", help="save dir for model weights")
    parser.add_argument("--save_freq", type=int, default=10, help="save frequency for weights and generated imgs")
    parser.add_argument("--eval_freq", type=int, default=5, help="evaluate frequency")
    parser.add_argument('--img_hw', default=[256, 256], nargs='+', type=int, help='training image size[h, w]')
    parser.add_argument("--epochs", type=int, default=210, help="number of epochs of training")
    parser.add_argument("--batch_size", type=int, default=32, help="size of the batches")
    parser.add_argument("--num_workers", type=int, default=8, help="number of workers, default: 8")
    parser.add_argument("--num_keypoints", type=int, default=98, help="number of keypoints")
    parser.add_argument("--lr", type=float, default=5e-4, help="Adam: learning rate")
    parser.add_argument('--lr_steps', default=[170, 200], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    parser.add_argument("--warmup_epoch", type=int, default=10, help="number of warmup epoch for training")
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    parser.add_argument('--test_only', action="store_true", help='Only test the model')

    return parser


def main(args):
    torch.manual_seed(1234)
    dataset_dir = args.dataset_dir
    save_weights_dir = args.save_weights_dir
    save_freq = args.save_freq
    eval_freq = args.eval_freq
    num_keypoints = args.num_keypoints
    num_workers = args.num_workers
    epochs = args.epochs
    bs = args.batch_size
    start_epoch = 0
    img_hw = args.img_hw
    os.makedirs(save_weights_dir, exist_ok=True)

    if "cuda" in args.device and not torch.cuda.is_available():
        device = torch.device("cpu")
    else:
        device = torch.device(args.device)
    print(f"using device: {device} for training.")

    # tensorboard writer
    tb_writer = SummaryWriter()

    # create model
    model = create_deep_pose_model(num_keypoints)
    model.to(device)

    # config dataset and dataloader
    data_transform = {
        "train": transforms.Compose([
            transforms.AffineTransform(scale_factor=(0.65, 1.35), rotate=45, shift_factor=0.15, fixed_size=img_hw),
            transforms.RandomHorizontalFlip(0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        "val": transforms.Compose([
            transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }
    train_dataset = WFLWDataset(root=dataset_dir,
                                train=True,
                                transforms=data_transform["train"])
    val_dataset = WFLWDataset(root=dataset_dir,
                              train=False,
                              transforms=data_transform["val"])

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=num_workers,
                              collate_fn=WFLWDataset.collate_fn,
                              persistent_workers=True)

    val_loader = DataLoader(val_dataset,
                            batch_size=bs,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=num_workers,
                            collate_fn=WFLWDataset.collate_fn,
                            persistent_workers=True)

    # define optimizers
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # define learning rate scheduler
    warmup_scheduler = torch.optim.lr_scheduler.LinearLR(
        optimizer=optimizer,
        start_factor=0.01,
        end_factor=1.0,
        total_iters=len(train_loader) * args.warmup_epoch
    )
    multi_step_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer=optimizer,
        milestones=[len(train_loader) * i for i in args.lr_steps],
        gamma=0.1
    )

    lr_scheduler = torch.optim.lr_scheduler.ChainedScheduler([warmup_scheduler, multi_step_scheduler])

    if args.resume:
        assert os.path.exists(args.resume)
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        start_epoch = checkpoint['epoch'] + 1
        print("the training process from epoch{}...".format(start_epoch))

    if args.test_only:
        evaluate(model=model,
                 epoch=start_epoch,
                 val_loader=val_loader,
                 device=device,
                 tb_writer=tb_writer,
                 affine_points_torch_func=transforms.affine_points_torch,
                 num_keypoints=num_keypoints,
                 img_hw=img_hw)
        return

    for epoch in range(start_epoch, epochs):
        # train
        train_one_epoch(model=model,
                        epoch=epoch,
                        train_loader=train_loader,
                        device=device,
                        optimizer=optimizer,
                        lr_scheduler=lr_scheduler,
                        tb_writer=tb_writer,
                        num_keypoints=num_keypoints,
                        img_hw=img_hw)

        # eval
        if epoch % eval_freq == 0 or epoch == args.epochs - 1:
            evaluate(model=model,
                     epoch=epoch,
                     val_loader=val_loader,
                     device=device,
                     tb_writer=tb_writer,
                     affine_points_torch_func=transforms.affine_points_torch,
                     num_keypoints=num_keypoints,
                     img_hw=img_hw)

        # save weights
        if epoch % save_freq == 0 or epoch == args.epochs - 1:
            save_files = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch
            }
            torch.save(save_files, os.path.join(save_weights_dir, f"model_weights_{epoch}.pth"))


if __name__ == '__main__':
    args = get_args_parser().parse_args()
    main(args)


================================================
FILE: pytorch_keypoint/DeepPose/train_multi_GPU.py
================================================
import os

import torch
import torch.amp
from torch.utils.data import DataLoader, DistributedSampler, BatchSampler
from torch.utils.tensorboard import SummaryWriter

import transforms
from model import create_deep_pose_model
from datasets import WFLWDataset
from train_utils.train_eval_utils import train_one_epoch, evaluate
from train_utils.distributed_utils import init_distributed_mode, is_main_process


def get_args_parser(add_help=True):
    import argparse

    parser = argparse.ArgumentParser(description="PyTorch DeepPose Training", add_help=add_help)
    parser.add_argument("--dataset_dir", type=str, default="/home/wz/datasets/WFLW", help="WFLW dataset directory")
    parser.add_argument("--device", type=str, default="cuda", help="training device, e.g. cpu, cuda")
    parser.add_argument("--save_weights_dir", type=str, default="./weights", help="save dir for model weights")
    parser.add_argument("--save_freq", type=int, default=5, help="save frequency for weights and generated imgs")
    parser.add_argument("--eval_freq", type=int, default=5, help="evaluate frequency")
    parser.add_argument('--img_hw', default=[256, 256], nargs='+', type=int, help='training image size[h, w]')
    parser.add_argument("--epochs", type=int, default=210, help="number of epochs of training")
    parser.add_argument("--batch_size", type=int, default=32, help="size of the batches")
    parser.add_argument("--num_workers", type=int, default=8, help="number of workers, default: 8")
    parser.add_argument("--num_keypoints", type=int, default=98, help="number of keypoints")
    parser.add_argument("--lr", type=float, default=5e-4, help="Adam: learning rate")
    parser.add_argument('--lr_steps', default=[170, 200], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    parser.add_argument("--warmup_epoch", type=int, default=10, help="number of warmup epoch for training")
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    parser.add_argument('--test_only', action="store_true", help='Only test the model')

    return parser


def main(args):
    torch.manual_seed(1234)
    init_distributed_mode(args)
    if not args.distributed:
        raise EnvironmentError("not support distributed training.")

    dataset_dir = args.dataset_dir
    save_weights_dir = args.save_weights_dir
    save_freq = args.save_freq
    eval_freq = args.eval_freq
    num_keypoints = args.num_keypoints
    num_workers = args.num_workers
    epochs = args.epochs
    bs = args.batch_size
    start_epoch = 0
    img_hw = args.img_hw
    device = torch.device(args.device)
    os.makedirs(save_weights_dir, exist_ok=True)

    # adjust learning rate
    args.lr = args.lr * args.world_size

    tb_writer = None
    if is_main_process():
        # tensorboard writer
        tb_writer = SummaryWriter()

    # create model
    model = create_deep_pose_model(num_keypoints)
    model.to(device)
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    # config dataset and dataloader
    data_transform = {
        "train": transforms.Compose([
            transforms.AffineTransform(scale_factor=(0.65, 1.35), rotate=45, shift_factor=0.15, fixed_size=img_hw),
            transforms.RandomHorizontalFlip(0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        "val": transforms.Compose([
            transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }
    train_dataset = WFLWDataset(root=dataset_dir,
                                train=True,
                                transforms=data_transform["train"])
    val_dataset = WFLWDataset(root=dataset_dir,
                              train=False,
                              transforms=data_transform["val"])

    train_sampler = DistributedSampler(train_dataset)
    val_sampler = DistributedSampler(val_dataset)
    train_batch_sampler = BatchSampler(train_sampler, args.batch_size, drop_last=True)

    train_loader = DataLoader(train_dataset,
                              batch_sampler=train_batch_sampler,
                              pin_memory=True,
                              num_workers=num_workers,
                              collate_fn=WFLWDataset.collate_fn,
                              persistent_workers=True)

    val_loader = DataLoader(val_dataset,
                            batch_size=bs,
                            sampler=val_sampler,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=num_workers,
                            collate_fn=WFLWDataset.collate_fn,
                            persistent_workers=True)

    # define optimizers
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # define learning rate scheduler
    warmup_scheduler = torch.optim.lr_scheduler.LinearLR(
        optimizer=optimizer,
        start_factor=0.01,
        end_factor=1.0,
        total_iters=len(train_loader) * args.warmup_epoch
    )
    multi_step_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer=optimizer,
        milestones=[len(train_loader) * i for i in args.lr_steps],
        gamma=0.1
    )

    lr_scheduler = torch.optim.lr_scheduler.ChainedScheduler([warmup_scheduler, multi_step_scheduler])

    if args.resume:
        assert os.path.exists(args.resume)
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.module.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        start_epoch = checkpoint['epoch'] + 1
        print("the training process from epoch{}...".format(start_epoch))

    if args.test_only:
        evaluate(model=model,
                 epoch=start_epoch,
                 val_loader=val_loader,
                 device=device,
                 tb_writer=tb_writer,
                 affine_points_torch_func=transforms.affine_points_torch,
                 num_keypoints=num_keypoints,
                 img_hw=img_hw)
        return

    for epoch in range(start_epoch, epochs):
        # train
        train_sampler.set_epoch(epoch)  # shuffle training data
        train_one_epoch(model=model,
                        epoch=epoch,
                        train_loader=train_loader,
                        device=device,
                        optimizer=optimizer,
                        lr_scheduler=lr_scheduler,
                        tb_writer=tb_writer,
                        num_keypoints=num_keypoints,
                        img_hw=img_hw)

        # eval
        if epoch % eval_freq == 0 or epoch == args.epochs - 1:
            evaluate(model=model,
                     epoch=epoch,
                     val_loader=val_loader,
                     device=device,
                     tb_writer=tb_writer,
                     affine_points_torch_func=transforms.affine_points_torch,
                     num_keypoints=num_keypoints,
                     img_hw=img_hw)

        # save weights
        if is_main_process() and (epoch % save_freq == 0 or epoch == args.epochs - 1):
            save_files = {
                'model': model.module.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch
            }
            torch.save(save_files, os.path.join(save_weights_dir, f"model_weights_{epoch}.pth"))


if __name__ == '__main__':
    args = get_args_parser().parse_args()
    main(args)


================================================
FILE: pytorch_keypoint/DeepPose/train_utils/distributed_utils.py
================================================
import os

import torch
import torch.distributed as dist


def reduce_value(input_value: torch.Tensor, average=True) -> torch.Tensor:
    """
    Args:
        input_value (Tensor): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values from all processes so that all processes
    have the averaged results.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_value

    with torch.inference_mode():  # 多GPU的情况
        dist.all_reduce(input_value)
        if average:
            input_value /= world_size

        return input_value


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def init_distributed_mode(args):
    if not torch.cuda.is_available():
        print('No available device')
        args.distributed = False
        return

    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print(f'| distributed init (rank {args.rank}): {args.dist_url}', flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend,
                                         init_method=args.dist_url,
                                         world_size=args.world_size,
                                         rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_keypoint/DeepPose/train_utils/losses.py
================================================
import math

import torch
import torch.nn as nn
import torch.nn.functional as F


class L1Loss(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor:
        """
        Args:
            pred [N, K, 2]
            label [N, K, 2]
            mask [N, K]
        """
        losses = F.l1_loss(pred, label, reduction="none")
        if mask is not None:
            # filter invalid keypoints(e.g. out of range)
            losses = losses * mask.unsqueeze(2)

        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)


class SmoothL1Loss(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor:
        """
        Args:
            pred [N, K, 2]
            label [N, K, 2]
            mask [N, K]
        """
        losses = F.smooth_l1_loss(pred, label, reduction="none")
        if mask is not None:
            # filter invalid keypoints(e.g. out of range)
            losses = losses * mask.unsqueeze(2)

        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)


class L2Loss(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor:
        """
        Args:
            pred [N, K, 2]
            label [N, K, 2]
            mask [N, K]
        """
        losses = F.mse_loss(pred, label, reduction="none")
        if mask is not None:
            # filter invalid keypoints(e.g. out of range)
            losses = losses * mask.unsqueeze(2)

        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)


class WingLoss(nn.Module):
    """refer https://github.com/TropComplique/wing-loss/blob/master/loss.py
    """
    def __init__(self, w: float = 10.0, epsilon: float = 2.0) -> None:
        super().__init__()
        self.w = w
        self.epsilon = epsilon
        self.C = w * (1.0 - math.log(1.0 + w / epsilon))

    def forward(self,
                pred: torch.Tensor,
                label: torch.Tensor,
                wh_tensor: torch.Tensor,
                mask: torch = None) -> torch.Tensor:
        """
        Args:
            pred [N, K, 2]
            wh_tensor [1, 1, 2]
            label [N, K, 2]
            mask [N, K]
        """
        delta = (pred - label).abs() * wh_tensor  # rel to abs
        losses = torch.where(condition=self.w > delta,
                             input=self.w * torch.log(1.0 + delta / self.epsilon),
                             other=delta - self.C)
        if mask is not None:
            # filter invalid keypoints(e.g. out of range)
            losses = losses * mask.unsqueeze(2)

        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)


class SoftWingLoss(nn.Module):
    """refer mmpose/models/losses/regression_loss.py
    """
    def __init__(self, omega1: float = 2.0, omega2: float = 20.0, epsilon: float = 0.5) -> None:
        super().__init__()
        self.omega1 = omega1
        self.omega2 = omega2
        self.epsilon = epsilon
        self.B = omega1 - omega2 * math.log(1.0 + omega1 / epsilon)

    def forward(self,
                pred: torch.Tensor,
                label: torch.Tensor,
                wh_tensor: torch.Tensor,
                mask: torch = None) -> torch.Tensor:
        """
        Args:
            pred [N, K, 2]
            label [N, K, 2]
            wh_tensor [1, 1, 2]
            mask [N, K]
        """
        delta = (pred - label).abs() * wh_tensor  # rel to abs
        losses = torch.where(condition=delta < self.omega1,
                             input=delta,
                             other=self.omega2 * torch.log(1.0 + delta / self.epsilon) + self.B)
        if mask is not None:
            # filter invalid keypoints(e.g. out of range)
            losses = losses * mask.unsqueeze(2)

        loss = torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)
        return loss


================================================
FILE: pytorch_keypoint/DeepPose/train_utils/metrics.py
================================================
import torch

from .distributed_utils import reduce_value, is_dist_avail_and_initialized


class NMEMetric:
    def __init__(self, device: torch.device) -> None:
        # 两眼外角点对应keypoint索引
        self.keypoint_idxs = [60, 72]
        self.nme_accumulator: float = 0.
        self.counter: float = 0.
        self.device = device

    def update(self, pred: torch.Tensor, gt: torch.Tensor, mask: torch.Tensor = None):
        """
        Args:
            pred (shape [N, K, 2]): pred keypoints
            gt (shape [N, K, 2]): gt keypoints
            mask (shape [N, K]): valid keypoints mask
        """
        # ion: inter-ocular distance normalized error
        ion = torch.linalg.norm(gt[:, self.keypoint_idxs[0]] - gt[:, self.keypoint_idxs[1]], dim=1)

        valid_ion_mask = ion > 0
        if mask is None:
            mask = valid_ion_mask
        else:
            mask = torch.logical_and(mask, valid_ion_mask.unsqueeze_(dim=1)).sum(dim=1) > 0
        num_valid = mask.sum().item()

        # equal: (pred - gt).pow(2).sum(dim=2).pow(0.5).mean(dim=1)
        l2_dis = torch.linalg.norm(pred - gt, dim=2)[mask].mean(dim=1)  # [N]

        # avoid divide by zero
        ion = ion[mask]  # [N]

        self.nme_accumulator += l2_dis.div(ion).sum().item()
        self.counter += num_valid

    def evaluate(self):
        return self.nme_accumulator / self.counter

    def synchronize_results(self):
        if is_dist_avail_and_initialized():
            self.nme_accumulator = reduce_value(
                torch.as_tensor(self.nme_accumulator, device=self.device),
                average=False
            ).item()

            self.counter = reduce_value(
                torch.as_tensor(self.counter, device=self.device),
                average=False
            )


if __name__ == '__main__':
    metric = NMEMetric()
    metric.update(pred=torch.randn(32, 98, 2),
                  gt=torch.randn(32, 98, 2),
                  mask=torch.randn(32, 98))
    print(metric.evaluate())


================================================
FILE: pytorch_keypoint/DeepPose/train_utils/train_eval_utils.py
================================================
import sys
import math
from typing import Callable, List

from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from .losses import WingLoss
from .metrics import NMEMetric
from .distributed_utils import is_main_process, reduce_value


def train_one_epoch(model: torch.nn.Module,
                    epoch: int,
                    train_loader: DataLoader,
                    device: torch.device,
                    optimizer: torch.optim.Optimizer,
                    lr_scheduler: torch.optim.lr_scheduler.LRScheduler,
                    tb_writer: SummaryWriter,
                    num_keypoints: int,
                    img_hw: List[int]) -> None:
    # define loss function
    loss_func = WingLoss()
    wh_tensor = torch.as_tensor(img_hw[::-1], dtype=torch.float32, device=device).reshape([1, 1, 2])

    model.train()
    train_bar = train_loader
    if is_main_process():
        train_bar = tqdm(train_loader, file=sys.stdout)

    for step, (imgs, targets) in enumerate(train_bar):
        imgs = imgs.to(device)
        labels = targets["keypoints"].to(device)

        optimizer.zero_grad()
        # use mixed precision to speed up training
        with torch.autocast(device_type=device.type):
            pred: torch.Tensor = model(imgs)
            loss: torch.Tensor = loss_func(pred.reshape((-1, num_keypoints, 2)), labels, wh_tensor)

        loss_value = reduce_value(loss).item()
        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            sys.exit(1)

        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        if is_main_process():
            train_bar.desc = f"train epoch[{epoch}] loss:{loss_value:.3f}"

            global_step = epoch * len(train_loader) + step
            tb_writer.add_scalar("train loss", loss.item(), global_step=global_step)
            tb_writer.add_scalar("learning rate", optimizer.param_groups[0]["lr"], global_step=global_step)


@torch.inference_mode()
def evaluate(model: torch.nn.Module,
             epoch: int,
             val_loader: DataLoader,
             device: torch.device,
             tb_writer: SummaryWriter,
             affine_points_torch_func: Callable,
             num_keypoints: int,
             img_hw: List[int]) -> None:
    model.eval()
    metric = NMEMetric(device=device)
    wh_tensor = torch.as_tensor(img_hw[::-1], dtype=torch.float32, device=device).reshape([1, 1, 2])
    eval_bar = val_loader
    if is_main_process():
        eval_bar = tqdm(val_loader, file=sys.stdout, desc="evaluation")

    for step, (imgs, targets) in enumerate(eval_bar):
        imgs = imgs.to(device)
        m_invs = targets["m_invs"].to(device)
        labels = targets["ori_keypoints"].to(device)

        pred = model(imgs)
        pred = pred.reshape((-1, num_keypoints, 2))  # [N, K, 2]
        pred = pred * wh_tensor  # rel coord to abs coord
        pred = affine_points_torch_func(pred, m_invs)

        metric.update(pred, labels)

    metric.synchronize_results()
    if is_main_process():
        nme = metric.evaluate()
        tb_writer.add_scalar("evaluation nme", nme, global_step=epoch)
        print(f"evaluation NME[{epoch}]: {nme:.3f}")


================================================
FILE: pytorch_keypoint/DeepPose/transforms.py
================================================
import math
import random
from typing import Tuple

import cv2
import torch
import numpy as np

from wflw_horizontal_flip_indices import wflw_flip_indices_dict


def adjust_box(xmin: int, ymin: int, xmax: int, ymax: int, fixed_size: Tuple[int, int]):
    """通过增加w或者h的方式保证输入图片的长宽比固定"""
    w = xmax - xmin
    h = ymax - ymin

    hw_ratio = fixed_size[0] / fixed_size[1]
    if h / w > hw_ratio:
        # 需要在w方向padding
        wi = h / hw_ratio
        pad_w = (wi - w) / 2
        xmin = xmin - pad_w
        xmax = xmax + pad_w
    else:
        # 需要在h方向padding
        hi = w * hw_ratio
        pad_h = (hi - h) / 2
        ymin = ymin - pad_h
        ymax = ymax + pad_h

    return xmin, ymin, xmax, ymax


def affine_points_np(keypoint: np.ndarray, m: np.ndarray) -> np.ndarray:
    """
    Args:
        keypoint [k, 2]
        m [2, 3]
    """
    ones = np.ones((keypoint.shape[0], 1), dtype=np.float32)
    keypoint = np.concatenate([keypoint, ones], axis=1)  # [k, 3]
    new_keypoint = np.matmul(keypoint, m.T)
    return new_keypoint


def affine_points_torch(keypoint: torch.Tensor, m: torch.Tensor) -> torch.Tensor:
    """
    Args:
        keypoint [n, k, 2]
        m [n, 2, 3]
    """
    dtype = keypoint.dtype
    device = keypoint.device

    n, k, _ = keypoint.shape
    ones = torch.ones(size=(n, k, 1), dtype=dtype, device=device)
    keypoint = torch.concat([keypoint, ones], dim=2)  # [n, k, 3]
    new_keypoint = torch.matmul(keypoint, m.transpose(1, 2))
    return new_keypoint


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class Resize(object):
    def __init__(self, h: int, w: int):
        self.h = h
        self.w = w

    def __call__(self, image: np.ndarray, target):
        image = cv2.resize(image, dsize=(self.w, self.h), fx=0, fy=0,
                           interpolation=cv2.INTER_LINEAR)

        return image, target


class ToTensor(object):
    """将opencv图像转为Tensor, HWC2CHW, 并缩放数值至0~1"""
    def __call__(self, image, target):
        image = torch.from_numpy(image).permute((2, 0, 1))
        image = image.to(torch.float32) / 255.

        if "ori_keypoint" in target and "keypoint" in target:
            target["ori_keypoint"] = torch.from_numpy(target["ori_keypoint"])
            target["keypoint"] = torch.from_numpy(target["keypoint"])
        target["m_inv"] = torch.from_numpy(target["m_inv"])
        return image, target


class Normalize(object):
    def __init__(self, mean=None, std=None):
        self.mean = torch.as_tensor(mean, dtype=torch.float32).reshape((3, 1, 1))
        self.std = torch.as_tensor(std, dtype=torch.float32).reshape((3, 1, 1))

    def __call__(self, image: torch.Tensor, target: dict):
        image.sub_(self.mean).div_(self.std)

        if "keypoint" in target:
            _, h, w = image.shape
            keypoint = target["keypoint"]
            keypoint[:, 0] /= w
            keypoint[:, 1] /= h
            target["keypoint"] = keypoint
        return image, target


class RandomHorizontalFlip(object):
    """随机对输入图片进行水平翻转"""
    def __init__(self, p: float = 0.5):
        self.p = p
        self.wflw_flip_ids = list(wflw_flip_indices_dict.values())

    def __call__(self, image: np.ndarray, target: dict):
        if random.random() < self.p:
            # [h, w, c]
            image = np.ascontiguousarray(np.flip(image, axis=[1]))

            # [k, 2]
            if "keypoint" in target:
                _, w, _ = image.shape
                keypoint: torch.Tensor = target["keypoint"]
                keypoint = keypoint[self.wflw_flip_ids]
                keypoint[:, 0] = w - keypoint[:, 0]
                target["keypoint"] = keypoint

        return image, target


class AffineTransform(object):
    """shift+scale+rotation"""
    def __init__(self,
                 scale_factor: Tuple[float, float] = (0.65, 1.35),
                 scale_prob: float = 1.,
                 rotate: int = 45,
                 rotate_prob: float = 0.6,
                 shift_factor: float = 0.15,
                 shift_prob: float = 0.3,
                 fixed_size: Tuple[int, int] = (256, 256)):
        self.scale_factor = scale_factor
        self.scale_prob = scale_prob
        self.rotate = rotate
        self.rotate_prob = rotate_prob
        self.shift_factor = shift_factor
        self.shift_prob = shift_prob
        self.fixed_size = fixed_size  # (h, w)

    def __call__(self, img: np.ndarray, target: dict):
        src_xmin, src_ymin, src_xmax, src_ymax = adjust_box(*target["box"], fixed_size=self.fixed_size)
        src_w = src_xmax - src_xmin
        src_h = src_ymax - src_ymin

        if random.random() < self.shift_prob:
            shift_w_factor = random.uniform(-self.shift_factor, self.shift_factor)
            shift_h_factor = random.uniform(-self.shift_factor, self.shift_factor)
            src_xmin -= int(src_w * shift_w_factor)
            src_xmax -= int(src_w * shift_w_factor)
            src_ymin -= int(src_h * shift_h_factor)
            src_ymax -= int(src_h * shift_h_factor)

        src_center = np.array([(src_xmin + src_xmax) / 2, (src_ymin + src_ymax) / 2], dtype=np.float32)
        src_p2 = src_center + np.array([0, -src_h / 2], dtype=np.float32)  # top middle
        src_p3 = src_center + np.array([src_w / 2, 0], dtype=np.float32)   # right middle

        dst_center = np.array([(self.fixed_size[1] - 1) / 2, (self.fixed_size[0] - 1) / 2], dtype=np.float32)
        dst_p2 = np.array([(self.fixed_size[1] - 1) / 2, 0], dtype=np.float32)  # top middle
        dst_p3 = np.array([self.fixed_size[1] - 1, (self.fixed_size[0] - 1) / 2], dtype=np.float32)  # right middle

        if random.random() < self.scale_prob:
            scale = random.uniform(*self.scale_factor)
            src_w = src_w * scale
            src_h = src_h * scale
            src_p2 = src_center + np.array([0, -src_h / 2], dtype=np.float32)  # top middle
            src_p3 = src_center + np.array([src_w / 2, 0], dtype=np.float32)   # right middle

        if random.random() < self.rotate_prob:
            angle = random.randint(-self.rotate, self.rotate)  # 角度制
            angle = angle / 180 * math.pi  # 弧度制
            src_p2 = src_center + np.array([src_h / 2 * math.sin(angle),
                                            -src_h / 2 * math.cos(angle)], dtype=np.float32)
            src_p3 = src_center + np.array([src_w / 2 * math.cos(angle),
                                            src_w / 2 * math.sin(angle)], dtype=np.float32)

        src = np.stack([src_center, src_p2, src_p3])
        dst = np.stack([dst_center, dst_p2, dst_p3])

        m = cv2.getAffineTransform(src, dst).astype(np.float32)  # 计算正向仿射变换矩阵
        m_inv = cv2.getAffineTransform(dst, src).astype(np.float32)  # 计算逆向仿射变换矩阵，方便后续还原

        # 对图像进行仿射变换
        warp_img = cv2.warpAffine(src=img,
                                  M=m,
                                  dsize=tuple(self.fixed_size[::-1]),  # [w, h]
                                  borderMode=cv2.BORDER_CONSTANT,
                                  borderValue=(0, 0, 0),
                                  flags=cv2.INTER_LINEAR)

        if "keypoint" in target:
            keypoint = target["keypoint"]
            keypoint = affine_points_np(keypoint, m)
            target["keypoint"] = keypoint

        # from utils import draw_keypoints
        # keypoint[:, 0] /= self.fixed_size[1]
        # keypoint[:, 1] /= self.fixed_size[0]
        # draw_keypoints(warp_img, keypoint, "affine.jpg", 2, is_rel=True)

        target["m"] = m
        target["m_inv"] = m_inv
        return warp_img, target


================================================
FILE: pytorch_keypoint/DeepPose/utils.py
================================================
import cv2
import numpy as np


def draw_keypoints(img: np.ndarray, coordinate: np.ndarray, save_path: str, radius: int = 3, is_rel: bool = False):
    coordinate_ = coordinate.copy()
    if is_rel:
        h, w, c = img.shape
        coordinate_[:, 0] *= w
        coordinate_[:, 1] *= h
    coordinate_ = coordinate_.astype(np.int64).tolist()

    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    for x, y in coordinate_:
        cv2.circle(img_bgr, center=(x, y), radius=radius, color=(255, 0, 0), thickness=-1)

    cv2.imwrite(save_path, img_bgr)


================================================
FILE: pytorch_keypoint/DeepPose/wflw_horizontal_flip_indices.py
================================================
wflw_flip_indices_dict = {
    0: 32,
    1: 31,
    2: 30,
    3: 29,
    4: 28,
    5: 27,
    6: 26,
    7: 25,
    8: 24,
    9: 23,
    10: 22,
    11: 21,
    12: 20,
    13: 19,
    14: 18,
    15: 17,
    16: 16,
    17: 15,
    18: 14,
    19: 13,
    20: 12,
    21: 11,
    22: 10,
    23: 9,
    24: 8,
    25: 7,
    26: 6,
    27: 5,
    28: 4,
    29: 3,
    30: 2,
    31: 1,
    32: 0,
    33: 46,
    34: 45,
    35: 44,
    36: 43,
    37: 42,
    38: 50,
    39: 49,
    40: 48,
    41: 47,
    42: 37,
    43: 36,
    44: 35,
    45: 34,
    46: 33,
    47: 41,
    48: 40,
    49: 39,
    50: 38,
    51: 51,
    52: 52,
    53: 53,
    54: 54,
    55: 59,
    56: 58,
    57: 57,
    58: 56,
    59: 55,
    60: 72,
    61: 71,
    62: 70,
    63: 69,
    64: 68,
    65: 75,
    66: 74,
    67: 73,
    68: 64,
    69: 63,
    70: 62,
    71: 61,
    72: 60,
    73: 67,
    74: 66,
    75: 65,
    76: 82,
    77: 81,
    78: 80,
    79: 79,
    80: 78,
    81: 77,
    82: 76,
    83: 87,
    84: 86,
    85: 85,
    86: 84,
    87: 83,
    88: 92,
    89: 91,
    90: 90,
    91: 89,
    92: 88,
    93: 95,
    94: 94,
    95: 93,
    96: 97,
    97: 96,
}


================================================
FILE: pytorch_keypoint/HRNet/README.md
================================================
# HRNet

## 该项目主要参考以下仓库
* https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
* https://github.com/stefanopini/simple-HRNet

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10或以上
* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))
* Ubuntu或Centos(不建议Windows)
* 最好使用GPU训练
* 详细环境配置见`requirements.txt`

## 文件结构：
```
  ├── model: 搭建HRNet相关代码
  ├── train_utils: 训练验证相关模块（包括coco验证相关）
  ├── my_dataset_coco.py: 自定义dataset用于读取COCO2017数据集
  ├── person_keypoints.json: COCO数据集中人体关键点相关信息
  ├── train.py: 单GPU/CPU训练脚本
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测
  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
  └── transforms.py: 数据增强相关
```

## 预训练权重下载地址（下载后放入当前文件夹中）：
由于原作者提供的预训练权重(Imagenet和COCO)是放在GoogleDrive和OneDrive上的，国内无法正常访问。所有我提前将权重文件全部下载并放在百度网盘中，
需要的可以自行下载，链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o

下载后的目录结构如下：
```
├── pytorch
      ├── pose_mpii
      ├── pose_coco
      │     ├── pose_resnet_50_384x288.pth
      │     ├── pose_resnet_50_256x192.pth
      │     ├── pose_resnet_101_384x288.pth
      │     ├── pose_resnet_101_256x192.pth
      │     ├── pose_hrnet_w32_384x288.pth
      │     └── pose_hrnet_w32_256x192.pth
      └── imagenet
            ├── resnet50-19c8e357.pth
            ├── resnet152-b121ed2d.pth
            ├── resnet101-5d3b4d8f.pth
            └── hrnet_w32-36af842e.pth
```
如果要直接使用在COCO数据集上预训练好的权重进行预测，下载pose_coco下的`pose_hrnet_w32_256x192.pth`使用即可。
如果要从头训练网络，下载imagenet下的`hrnet_w32-36af842e.pth`文件，并重命名为`hrnet_w32.pth`即可。

除此之外，还有一个`person_detection_results`文件，存储的是论文中提到的人体检测器的检测结果，如果需要使用可以下载，但个人建议直接使用COCO val中GT信息即可。
链接: https://pan.baidu.com/s/19Z4mmNHUD934GQ9QYcF5iw  密码: i08q
 
## 数据集，本例程使用的是COCO2017数据集
* COCO官网地址：https://cocodataset.org/
* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/113247318
* 这里以下载coco2017数据集为例，主要下载三个文件：
    * `2017 Train images [118K/18GB]`：训练过程中使用到的所有图像文件
    * `2017 Val images [5K/1GB]`：验证过程中使用到的所有图像文件
    * `2017 Train/Val annotations [241MB]`：对应训练集和验证集的标注json文件
* 都解压到`coco2017`文件夹下，可得到如下文件夹结构：
```
├── coco2017: 数据集根目录
     ├── train2017: 所有训练图像文件夹(118287张)
     ├── val2017: 所有验证图像文件夹(5000张)
     └── annotations: 对应标注文件夹
              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件
              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件
              ├── captions_train2017.json: 对应图像描述的训练集标注文件
              ├── captions_val2017.json: 对应图像描述的验证集标注文件
              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件
              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹
```

## 训练方法
* 注：该项目从头训练HRNet在MS COCO2017的val上的mAP[@0.50:0.95]为76.1，利用原作者提供的权重在val上的mAP[@0.50:0.95]为76.6，相差0.5个点，
暂时没有找到原因。由于训练该网络需要迭代210个epoch(按照论文中的数据)，训练时间很长，建议直接使用原作者提供训练好的权重。另外，在训练过程中发现GPU的利用率
并不高(在20%~60%之间浮动)，暂时猜测是网络结构的原因。
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 确保设置好`--num-joints`(对于人体检测的关键点个数，COCO是17个点)、`--fixed-size`(输入目标图像的高宽，默认[256, 192])和`--data-path`(指向`coco2017`目录)
* 若要使用单GPU训练直接使用train.py训练脚本
* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`

## 注意事项
1. 在使用训练脚本时，注意要将`--data-path`设置为自己存放数据集的**根目录**：
假设要使用COCO数据集，启用自定义数据集读取CocoDetection并将数据集解压到成/data/coco2017目录下
```
python train.py --data-path /data/coco2017
```
2. 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前10个值是COCO指标，后面两个值是训练平均损失以及学习率
3. 在使用预测脚本时，如果要读取自己训练好的权重要将`weights_path`设置为你自己生成的权重路径。


## 如果对HRNet网络不是很理解可参考我的bilibili
https://www.bilibili.com/video/BV1bB4y1y7qP

## 进一步了解该项目，以及对HRNet代码的分析可参考我的bilibili
https://www.bilibili.com/video/BV1ar4y157JM

## HRNet网络结构图
![HRNet.png](HRNet.png)


================================================
FILE: pytorch_keypoint/HRNet/draw_utils.py
================================================
import numpy as np
from numpy import ndarray
import PIL
from PIL import ImageDraw, ImageFont
from PIL.Image import Image

# COCO 17 points
point_name = ["nose", "left_eye", "right_eye",
              "left_ear", "right_ear",
              "left_shoulder", "right_shoulder",
              "left_elbow", "right_elbow",
              "left_wrist", "right_wrist",
              "left_hip", "right_hip",
              "left_knee", "right_knee",
              "left_ankle", "right_ankle"]

point_color = [(240, 2, 127), (240, 2, 127), (240, 2, 127),
               (240, 2, 127), (240, 2, 127),
               (255, 255, 51), (255, 255, 51),
               (254, 153, 41), (44, 127, 184),
               (217, 95, 14), (0, 0, 255),
               (255, 255, 51), (255, 255, 51), (228, 26, 28),
               (49, 163, 84), (252, 176, 243), (0, 176, 240),
               (255, 255, 0), (169, 209, 142),
               (255, 255, 0), (169, 209, 142),
               (255, 255, 0), (169, 209, 142)]


def draw_keypoints(img: Image,
                   keypoints: ndarray,
                   scores: ndarray = None,
                   thresh: float = 0.2,
                   r: int = 2,
                   draw_text: bool = False,
                   font: str = 'arial.ttf',
                   font_size: int = 10):
    if isinstance(img, ndarray):
        img = PIL.Image.fromarray(img)

    if scores is None:
        scores = np.ones(keypoints.shape[0])

    if draw_text:
        try:
            font = ImageFont.truetype(font, font_size)
        except IOError:
            font = ImageFont.load_default()

    draw = ImageDraw.Draw(img)
    for i, (point, score) in enumerate(zip(keypoints, scores)):
        if score > thresh and np.max(point) > 0:
            draw.ellipse([point[0] - r, point[1] - r, point[0] + r, point[1] + r],
                         fill=point_color[i],
                         outline=(255, 255, 255))
            if draw_text:
                draw.text((point[0] + r, point[1] + r), text=point_name[i], font=font)

    return img


================================================
FILE: pytorch_keypoint/HRNet/model/__init__.py
================================================
from .hrnet import HighResolutionNet


================================================
FILE: pytorch_keypoint/HRNet/model/hrnet.py
================================================
import torch.nn as nn

BN_MOMENTUM = 0.1


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
                                  momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class StageModule(nn.Module):
    def __init__(self, input_branches, output_branches, c):
        """
        构建对应stage，即用来融合不同尺度的实现
        :param input_branches: 输入的分支数，每个分支对应一种尺度
        :param output_branches: 输出的分支数
        :param c: 输入的第一个分支通道数
        """
        super().__init__()
        self.input_branches = input_branches
        self.output_branches = output_branches

        self.branches = nn.ModuleList()
        for i in range(self.input_branches):  # 每个分支上都先通过4个BasicBlock
            w = c * (2 ** i)  # 对应第i个分支的通道数
            branch = nn.Sequential(
                BasicBlock(w, w),
                BasicBlock(w, w),
                BasicBlock(w, w),
                BasicBlock(w, w)
            )
            self.branches.append(branch)

        self.fuse_layers = nn.ModuleList()  # 用于融合每个分支上的输出
        for i in range(self.output_branches):
            self.fuse_layers.append(nn.ModuleList())
            for j in range(self.input_branches):
                if i == j:
                    # 当输入、输出为同一个分支时不做任何处理
                    self.fuse_layers[-1].append(nn.Identity())
                elif i < j:
                    # 当输入分支j大于输出分支i时(即输入分支下采样率大于输出分支下采样率)，
                    # 此时需要对输入分支j进行通道调整以及上采样，方便后续相加
                    self.fuse_layers[-1].append(
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=1, stride=1, bias=False),
                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
                            nn.Upsample(scale_factor=2.0 ** (j - i), mode='nearest')
                        )
                    )
                else:  # i > j
                    # 当输入分支j小于输出分支i时(即输入分支下采样率小于输出分支下采样率)，
                    # 此时需要对输入分支j进行通道调整以及下采样，方便后续相加
                    # 注意，这里每次下采样2x都是通过一个3x3卷积层实现的，4x就是两个，8x就是三个，总共i-j个
                    ops = []
                    # 前i-j-1个卷积层不用变通道，只进行下采样
                    for k in range(i - j - 1):
                        ops.append(
                            nn.Sequential(
                                nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=3, stride=2, padding=1, bias=False),
                                nn.BatchNorm2d(c * (2 ** j), momentum=BN_MOMENTUM),
                                nn.ReLU(inplace=True)
                            )
                        )
                    # 最后一个卷积层不仅要调整通道，还要进行下采样
                    ops.append(
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1, bias=False),
                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM)
                        )
                    )
                    self.fuse_layers[-1].append(nn.Sequential(*ops))

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        # 每个分支通过对应的block
        x = [branch(xi) for branch, xi in zip(self.branches, x)]

        # 接着融合不同尺寸信息
        x_fused = []
        for i in range(len(self.fuse_layers)):
            x_fused.append(
                self.relu(
                    sum([self.fuse_layers[i][j](x[j]) for j in range(len(self.branches))])
                )
            )

        return x_fused


class HighResolutionNet(nn.Module):
    def __init__(self, base_channel: int = 32, num_joints: int = 17):
        super().__init__()
        # Stem
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)

        # Stage1
        downsample = nn.Sequential(
            nn.Conv2d(64, 256, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(256, momentum=BN_MOMENTUM)
        )
        self.layer1 = nn.Sequential(
            Bottleneck(64, 64, downsample=downsample),
            Bottleneck(256, 64),
            Bottleneck(256, 64),
            Bottleneck(256, 64)
        )

        self.transition1 = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(256, base_channel, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(base_channel, momentum=BN_MOMENTUM),
                nn.ReLU(inplace=True)
            ),
            nn.Sequential(
                nn.Sequential(  # 这里又使用一次Sequential是为了适配原项目中提供的权重
                    nn.Conv2d(256, base_channel * 2, kernel_size=3, stride=2, padding=1, bias=False),
                    nn.BatchNorm2d(base_channel * 2, momentum=BN_MOMENTUM),
                    nn.ReLU(inplace=True)
                )
            )
        ])

        # Stage2
        self.stage2 = nn.Sequential(
            StageModule(input_branches=2, output_branches=2, c=base_channel)
        )

        # transition2
        self.transition2 = nn.ModuleList([
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Sequential(
                nn.Sequential(
                    nn.Conv2d(base_channel * 2, base_channel * 4, kernel_size=3, stride=2, padding=1, bias=False),
                    nn.BatchNorm2d(base_channel * 4, momentum=BN_MOMENTUM),
                    nn.ReLU(inplace=True)
                )
            )
        ])

        # Stage3
        self.stage3 = nn.Sequential(
            StageModule(input_branches=3, output_branches=3, c=base_channel),
            StageModule(input_branches=3, output_branches=3, c=base_channel),
            StageModule(input_branches=3, output_branches=3, c=base_channel),
            StageModule(input_branches=3, output_branches=3, c=base_channel)
        )

        # transition3
        self.transition3 = nn.ModuleList([
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Sequential(
                nn.Sequential(
                    nn.Conv2d(base_channel * 4, base_channel * 8, kernel_size=3, stride=2, padding=1, bias=False),
                    nn.BatchNorm2d(base_channel * 8, momentum=BN_MOMENTUM),
                    nn.ReLU(inplace=True)
                )
            )
        ])

        # Stage4
        # 注意，最后一个StageModule只输出分辨率最高的特征层
        self.stage4 = nn.Sequential(
            StageModule(input_branches=4, output_branches=4, c=base_channel),
            StageModule(input_branches=4, output_branches=4, c=base_channel),
            StageModule(input_branches=4, output_branches=1, c=base_channel)
        )

        # Final layer
        self.final_layer = nn.Conv2d(base_channel, num_joints, kernel_size=1, stride=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = [trans(x) for trans in self.transition1]  # Since now, x is a list

        x = self.stage2(x)
        x = [
            self.transition2[0](x[0]),
            self.transition2[1](x[1]),
            self.transition2[2](x[-1])
        ]  # New branch derives from the "upper" branch only

        x = self.stage3(x)
        x = [
            self.transition3[0](x[0]),
            self.transition3[1](x[1]),
            self.transition3[2](x[2]),
            self.transition3[3](x[-1]),
        ]  # New branch derives from the "upper" branch only

        x = self.stage4(x)

        x = self.final_layer(x[0])

        return x


================================================
FILE: pytorch_keypoint/HRNet/my_dataset_coco.py
================================================
import os
import copy

import torch
import numpy as np
import cv2
import torch.utils.data as data
from pycocotools.coco import COCO


class CocoKeypoint(data.Dataset):
    def __init__(self,
                 root,
                 dataset="train",
                 years="2017",
                 transforms=None,
                 det_json_path=None,
                 fixed_size=(256, 192)):
        super().__init__()
        assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]'
        anno_file = f"person_keypoints_{dataset}{years}.json"
        assert os.path.exists(root), "file '{}' does not exist.".format(root)
        self.img_root = os.path.join(root, f"{dataset}{years}")
        assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root)
        self.anno_path = os.path.join(root, "annotations", anno_file)
        assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path)

        self.fixed_size = fixed_size
        self.mode = dataset
        self.transforms = transforms
        self.coco = COCO(self.anno_path)
        img_ids = list(sorted(self.coco.imgs.keys()))

        if det_json_path is not None:
            det = self.coco.loadRes(det_json_path)
        else:
            det = self.coco

        self.valid_person_list = []
        obj_idx = 0
        for img_id in img_ids:
            img_info = self.coco.loadImgs(img_id)[0]
            ann_ids = det.getAnnIds(imgIds=img_id)
            anns = det.loadAnns(ann_ids)
            for ann in anns:
                # only save person class
                if ann["category_id"] != 1:
                    print(f'warning: find not support id: {ann["category_id"]}, only support id: 1 (person)')
                    continue

                # COCO_val2017_detections_AP_H_56_person.json文件中只有det信息，没有keypoint信息，跳过检查
                if det_json_path is None:
                    # skip objs without keypoints annotation
                    if "keypoints" not in ann:
                        continue
                    if max(ann["keypoints"]) == 0:
                        continue

                xmin, ymin, w, h = ann['bbox']
                # Use only valid bounding boxes
                if w > 0 and h > 0:
                    info = {
                        "box": [xmin, ymin, w, h],
                        "image_path": os.path.join(self.img_root, img_info["file_name"]),
                        "image_id": img_id,
                        "image_width": img_info['width'],
                        "image_height": img_info['height'],
                        "obj_origin_hw": [h, w],
                        "obj_index": obj_idx,
                        "score": ann["score"] if "score" in ann else 1.
                    }

                    # COCO_val2017_detections_AP_H_56_person.json文件中只有det信息，没有keypoint信息，跳过
                    if det_json_path is None:
                        keypoints = np.array(ann["keypoints"]).reshape([-1, 3])
                        visible = keypoints[:, 2]
                        keypoints = keypoints[:, :2]
                        info["keypoints"] = keypoints
                        info["visible"] = visible

                    self.valid_person_list.append(info)
                    obj_idx += 1

    def __getitem__(self, idx):
        target = copy.deepcopy(self.valid_person_list[idx])

        image = cv2.imread(target["image_path"])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transforms is not None:
            image, person_info = self.transforms(image, target)

        return image, target

    def __len__(self):
        return len(self.valid_person_list)

    @staticmethod
    def collate_fn(batch):
        imgs_tuple, targets_tuple = tuple(zip(*batch))
        imgs_tensor = torch.stack(imgs_tuple)
        return imgs_tensor, targets_tuple


if __name__ == '__main__':
    train = CocoKeypoint("/data/coco2017/", dataset="val")
    print(len(train))
    t = train[0]
    print(t)


================================================
FILE: pytorch_keypoint/HRNet/person_keypoints.json
================================================
{
  "keypoints": ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder","right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist","left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"],
  "skeleton": [[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]],
  "flip_pairs": [[1,2], [3,4], [5,6], [7,8], [9,10], [11,12], [13,14], [15,16]],
  "kps_weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5],
  "upper_body_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
  "lower_body_ids": [11, 12, 13, 14, 15, 16]
}

================================================
FILE: pytorch_keypoint/HRNet/plot_curve.py
================================================
import datetime
import matplotlib.pyplot as plt


def plot_loss_and_lr(train_loss, learning_rate):
    try:
        x = list(range(len(train_loss)))
        fig, ax1 = plt.subplots(1, 1)
        ax1.plot(x, train_loss, 'r', label='loss')
        ax1.set_xlabel("step")
        ax1.set_ylabel("loss")
        ax1.set_title("Train Loss and lr")
        plt.legend(loc='best')

        ax2 = ax1.twinx()
        ax2.plot(x, learning_rate, label='lr')
        ax2.set_ylabel("learning rate")
        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
        plt.legend(loc='best')

        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
        plt.close()
        print("successful save loss curve! ")
    except Exception as e:
        print(e)


def plot_map(mAP):
    try:
        x = list(range(len(mAP)))
        plt.plot(x, mAP, label='mAp')
        plt.xlabel('epoch')
        plt.ylabel('mAP')
        plt.title('Eval mAP')
        plt.xlim(0, len(mAP))
        plt.legend(loc='best')
        plt.savefig('./mAP.png')
        plt.close()
        print("successful save mAP curve!")
    except Exception as e:
        print(e)


================================================
FILE: pytorch_keypoint/HRNet/predict.py
================================================
import os
import json

import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt

from model import HighResolutionNet
from draw_utils import draw_keypoints
import transforms


def predict_all_person():
    # TODO
    pass


def predict_single_person():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"using device: {device}")

    flip_test = True
    resize_hw = (256, 192)
    img_path = "./person.png"
    weights_path = "./pose_hrnet_w32_256x192.pth"
    keypoint_json_path = "person_keypoints.json"
    assert os.path.exists(img_path), f"file: {img_path} does not exist."
    assert os.path.exists(weights_path), f"file: {weights_path} does not exist."
    assert os.path.exists(keypoint_json_path), f"file: {keypoint_json_path} does not exist."

    data_transform = transforms.Compose([
        transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=resize_hw),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # read json file
    with open(keypoint_json_path, "r") as f:
        person_info = json.load(f)

    # read single-person image
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_tensor, target = data_transform(img, {"box": [0, 0, img.shape[1] - 1, img.shape[0] - 1]})
    img_tensor = torch.unsqueeze(img_tensor, dim=0)

    # create model
    # HRNet-W32: base_channel=32
    # HRNet-W48: base_channel=48
    model = HighResolutionNet(base_channel=32)
    weights = torch.load(weights_path, map_location=device)
    weights = weights if "model" not in weights else weights["model"]
    model.load_state_dict(weights)
    model.to(device)
    model.eval()

    with torch.inference_mode():
        outputs = model(img_tensor.to(device))

        if flip_test:
            flip_tensor = transforms.flip_images(img_tensor)
            flip_outputs = torch.squeeze(
                transforms.flip_back(model(flip_tensor.to(device)), person_info["flip_pairs"]),
            )
            # feature is not aligned, shift flipped heatmap for higher accuracy
            # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22
            flip_outputs[..., 1:] = flip_outputs.clone()[..., 0: -1]
            outputs = (outputs + flip_outputs) * 0.5

        keypoints, scores = transforms.get_final_preds(outputs, [target["reverse_trans"]], True)
        keypoints = np.squeeze(keypoints)
        scores = np.squeeze(scores)

        plot_img = draw_keypoints(img, keypoints, scores, thresh=0.2, r=3)
        plt.imshow(plot_img)
        plt.show()
        plot_img.save("test_result.jpg")


if __name__ == '__main__':
    predict_single_person()


================================================
FILE: pytorch_keypoint/HRNet/requirements.txt
================================================
numpy
opencv_python==4.5.4.60
lxml
torch==1.10.1
torchvision==0.11.1
pycocotools
matplotlib
tqdm

================================================
FILE: pytorch_keypoint/HRNet/train.py
================================================
import json
import os
import datetime

import torch
from torch.utils import data
import numpy as np

import transforms
from model import HighResolutionNet
from my_dataset_coco import CocoKeypoint
from train_utils import train_eval_utils as utils


def create_model(num_joints, load_pretrain_weights=True):
    model = HighResolutionNet(base_channel=32, num_joints=num_joints)
    
    if load_pretrain_weights:
        # 载入预训练模型权重
        # 链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o
        weights_dict = torch.load("./hrnet_w32.pth", map_location='cpu')

        for k in list(weights_dict.keys()):
            # 如果载入的是imagenet权重，就删除无用权重
            if ("head" in k) or ("fc" in k):
                del weights_dict[k]

            # 如果载入的是coco权重，对比下num_joints，如果不相等就删除
            if "final_layer" in k:
                if weights_dict[k].shape[0] != num_joints:
                    del weights_dict[k]

        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
        if len(missing_keys) != 0:
            print("missing_keys: ", missing_keys)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    with open(args.keypoints_path, "r") as f:
        person_kps_info = json.load(f)

    fixed_size = args.fixed_size
    heatmap_hw = (args.fixed_size[0] // 4, args.fixed_size[1] // 4)
    kps_weights = np.array(person_kps_info["kps_weights"],
                           dtype=np.float32).reshape((args.num_joints,))
    data_transform = {
        "train": transforms.Compose([
            transforms.HalfBody(0.3, person_kps_info["upper_body_ids"], person_kps_info["lower_body_ids"]),
            transforms.AffineTransform(scale=(0.65, 1.35), rotation=(-45, 45), fixed_size=fixed_size),
            transforms.RandomHorizontalFlip(0.5, person_kps_info["flip_pairs"]),
            transforms.KeypointToHeatMap(heatmap_hw=heatmap_hw, gaussian_sigma=2, keypoints_weights=kps_weights),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        "val": transforms.Compose([
            transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=fixed_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }

    data_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> person_keypoints_train2017.json
    train_dataset = CocoKeypoint(data_root, "train", transforms=data_transform["train"], fixed_size=args.fixed_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    train_data_loader = data.DataLoader(train_dataset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        pin_memory=True,
                                        num_workers=nw,
                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # coco2017 -> annotations -> person_keypoints_val2017.json
    val_dataset = CocoKeypoint(data_root, "val", transforms=data_transform["val"], fixed_size=args.fixed_size,
                               det_json_path=args.person_det)
    val_data_loader = data.DataLoader(val_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      pin_memory=True,
                                      num_workers=nw,
                                      collate_fn=val_dataset.collate_fn)

    # create model
    model = create_model(num_joints=args.num_joints)
    # print(model)

    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params,
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if args.resume != "":
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])
        print("the training process from epoch{}...".format(args.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 50 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device=device, epoch=epoch,
                                              print_freq=50, warmup=True,
                                              scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_loader, device=device,
                                   flip=True, flip_pairs=person_kps_info["flip_pairs"])

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # @0.5 mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录(coco2017)
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # COCO数据集人体关键点信息
    parser.add_argument('--keypoints-path', default="./person_keypoints.json", type=str,
                        help='person_keypoints.json path')
    # 原项目提供的验证集person检测信息，如果要使用GT信息，直接将该参数置为None，建议设置成None
    parser.add_argument('--person-det', type=str, default=None)
    parser.add_argument('--fixed-size', default=[256, 192], nargs='+', type=int, help='input size')
    # keypoints点数
    parser.add_argument('--num-joints', default=17, type=int, help='num_joints')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start-epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=210, type=int, metavar='N',
                        help='number of total epochs to run')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[170, 200], nargs='+', type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 学习率
    parser.add_argument('--lr', default=0.001, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # AdamW的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练的batch size
    parser.add_argument('--batch-size', default=32, type=int, metavar='N',
                        help='batch size when training.')
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_keypoint/HRNet/train_multi_GPU.py
================================================
import json
import time
import os
import datetime

import torch
from torch.utils import data
import numpy as np

import transforms
from model import HighResolutionNet
from my_dataset_coco import CocoKeypoint
import train_utils.train_eval_utils as utils
from train_utils import init_distributed_mode, save_on_master, mkdir


def create_model(num_joints, load_pretrain_weights=True):
    model = HighResolutionNet(base_channel=32, num_joints=num_joints)

    if load_pretrain_weights:
        # 载入预训练模型权重
        # 链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o
        weights_dict = torch.load("./hrnet_w32.pth", map_location='cpu')

        for k in list(weights_dict.keys()):
            # 如果载入的是imagenet权重，就删除无用权重
            if ("head" in k) or ("fc" in k):
                del weights_dict[k]

            # 如果载入的是coco权重，对比下num_joints，如果不相等就删除
            if "final_layer" in k:
                if weights_dict[k].shape[0] != num_joints:
                    del weights_dict[k]

        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
        if len(missing_keys) != 0:
            print("missing_keys: ", missing_keys)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # 用来保存coco_info的文件
    now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    key_results_file = f"results{now}.txt"

    with open(args.keypoints_path, "r") as f:
        person_kps_info = json.load(f)

    fixed_size = args.fixed_size
    heatmap_hw = (args.fixed_size[0] // 4, args.fixed_size[1] // 4)
    kps_weights = np.array(person_kps_info["kps_weights"],
                           dtype=np.float32).reshape((args.num_joints,))
    data_transform = {
        "train": transforms.Compose([
            transforms.HalfBody(0.3, person_kps_info["upper_body_ids"], person_kps_info["lower_body_ids"]),
            transforms.AffineTransform(scale=(0.65, 1.35), rotation=(-45, 45), fixed_size=fixed_size),
            transforms.RandomHorizontalFlip(0.5, person_kps_info["flip_pairs"]),
            transforms.KeypointToHeatMap(heatmap_hw=heatmap_hw, gaussian_sigma=2, keypoints_weights=kps_weights),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        "val": transforms.Compose([
            transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=fixed_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }

    data_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> person_keypoints_train2017.json
    train_dataset = CocoKeypoint(data_root, "train", transforms=data_transform["train"], fixed_size=args.fixed_size)

    # load validation data set
    # coco2017 -> annotations -> person_keypoints_val2017.json
    val_dataset = CocoKeypoint(data_root, "val", transforms=data_transform["val"], fixed_size=args.fixed_size,
                               det_json_path=args.person_det)

    print("Creating data loaders")
    if args.distributed:
        train_sampler = data.distributed.DistributedSampler(train_dataset)
        test_sampler = data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = data.RandomSampler(train_dataset)
        test_sampler = data.SequentialSampler(val_dataset)

    train_batch_sampler = data.BatchSampler(train_sampler, args.batch_size, drop_last=True)

    data_loader = data.DataLoader(train_dataset,
                                  batch_sampler=train_batch_sampler,
                                  num_workers=args.workers,
                                  collate_fn=train_dataset.collate_fn)

    data_loader_test = data.DataLoader(val_dataset,
                                       batch_size=args.batch_size,
                                       sampler=test_sampler,
                                       num_workers=args.workers,
                                       collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + classes
    model = create_model(num_joints=args.num_joints)
    model.to(device)

    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params,
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        utils.evaluate(model, data_loader_test, device=device,
                       flip=True, flip_pairs=person_kps_info["flip_pairs"])
        return

    train_loss = []
    learning_rate = []
    val_map = []

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,
                                              device, epoch, args.print_freq,
                                              warmup=True, scaler=scaler)

        # update learning rate
        lr_scheduler.step()

        # evaluate after every epoch
        key_info = utils.evaluate(model, data_loader_test, device=device,
                                  flip=True, flip_pairs=person_kps_info["flip_pairs"])

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            train_loss.append(mean_loss.item())
            learning_rate.append(lr)
            val_map.append(key_info[1])  # @0.5 mAP

            # write into txt
            with open(key_results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [f"{i:.4f}" for i in key_info + [mean_loss.item()]] + [f"{lr:.6f}"]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

        if args.output_dir:
            # 只在主进程上执行保存权重操作
            save_files = {'model': model_without_ddp.state_dict(),
                          'optimizer': optimizer.state_dict(),
                          'lr_scheduler': lr_scheduler.state_dict(),
                          'args': args,
                          'epoch': epoch}
            if args.amp:
                save_files["scaler"] = scaler.state_dict()
            save_on_master(save_files,
                           os.path.join(args.output_dir, f'model_{epoch}.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    if args.rank in [-1, 0]:
        # plot loss and lr curve
        if len(train_loss) != 0 and len(learning_rate) != 0:
            from plot_curve import plot_loss_and_lr
            plot_loss_and_lr(train_loss, learning_rate)

        # plot mAP curve
        if len(val_map) != 0:
            from plot_curve import plot_map
            plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(coco2017)
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # COCO数据集人体关键点信息
    parser.add_argument('--keypoints-path', default="./person_keypoints.json", type=str,
                        help='person_keypoints.json path')
    # 原项目提供的验证集person检测信息，如果要使用GT信息，直接将该参数置为None，建议设置成None
    parser.add_argument('--person-det', type=str, default=None)
    parser.add_argument('--fixed-size', default=[256, 192], nargs='+', type=int, help='input size')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-joints', default=17, type=int, help='num_joints(num_keypoints)')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=32, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start-epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=210, type=int, metavar='N',
                        help='number of total epochs to run')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 学习率
    parser.add_argument('--lr', default=0.001, type=float,
                        help='initial learning rate, 0.001 is the default value for training '
                             'on 4 gpus and 32 images_per_gpu')
    # AdamW的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[170, 200], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=50, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--test-only', action="store_true", help="test only")

    # 开启的进程数(注意不是线程)
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    parser.add_argument("--sync-bn", action="store_true", help="Use sync batch norm")
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_keypoint/HRNet/train_utils/__init__.py
================================================
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .distributed_utils import init_distributed_mode, save_on_master, mkdir
from .coco_eval import EvalCOCOMetric
from .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api


================================================
FILE: pytorch_keypoint/HRNet/train_utils/coco_eval.py
================================================
import json
import copy

from PIL import Image, ImageDraw
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from .distributed_utils import all_gather, is_main_process
from transforms import affine_points


def merge(img_ids, eval_results):
    """将多个进程之间的数据汇总在一起"""
    all_img_ids = all_gather(img_ids)
    all_eval_results = all_gather(eval_results)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_results = []
    for p in all_eval_results:
        merged_eval_results.extend(p)

    merged_img_ids = np.array(merged_img_ids)

    # keep only unique (and in sorted order) images
    # 去除重复的图片索引，多GPU训练时为了保证每个进程的训练图片数量相同，可能将一张图片分配给多个进程
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_results = [merged_eval_results[i] for i in idx]

    return list(merged_img_ids), merged_eval_results


class EvalCOCOMetric:
    def __init__(self,
                 coco: COCO = None,
                 iou_type: str = "keypoints",
                 results_file_name: str = "predict_results.json",
                 classes_mapping: dict = None,
                 threshold: float = 0.2):
        self.coco = copy.deepcopy(coco)
        self.obj_ids = []  # 记录每个进程处理目标(person)的ids
        self.results = []
        self.aggregation_results = None
        self.classes_mapping = classes_mapping
        self.coco_evaluator = None
        assert iou_type in ["keypoints"]
        self.iou_type = iou_type
        self.results_file_name = results_file_name
        self.threshold = threshold

    def plot_img(self, img_path, keypoints, r=3):
        img = Image.open(img_path)
        draw = ImageDraw.Draw(img)
        for i, point in enumerate(keypoints):
            draw.ellipse([point[0] - r, point[1] - r, point[0] + r, point[1] + r],
                         fill=(255, 0, 0))
        img.show()

    def prepare_for_coco_keypoints(self, targets, outputs):
        # 遍历每个person的预测结果(注意这里不是每张，一张图片里可能有多个person)
        for target, keypoints, scores in zip(targets, outputs[0], outputs[1]):
            if len(keypoints) == 0:
                continue

            obj_idx = int(target["obj_index"])
            if obj_idx in self.obj_ids:
                # 防止出现重复的数据
                continue

            self.obj_ids.append(obj_idx)
            # self.plot_img(target["image_path"], keypoints)

            mask = np.greater(scores, 0.2)
            if mask.sum() == 0:
                k_score = 0
            else:
                k_score = np.mean(scores[mask])

            keypoints = np.concatenate([keypoints, scores], axis=1)
            keypoints = np.reshape(keypoints, -1)

            # We recommend rounding coordinates to the nearest tenth of a pixel
            # to reduce resulting JSON file size.
            keypoints = [round(k, 2) for k in keypoints.tolist()]

            res = {"image_id": target["image_id"],
                   "category_id": 1,  # person
                   "keypoints": keypoints,
                   "score": target["score"] * k_score}

            self.results.append(res)

    def update(self, targets, outputs):
        if self.iou_type == "keypoints":
            self.prepare_for_coco_keypoints(targets, outputs)
        else:
            raise KeyError(f"not support iou_type: {self.iou_type}")

    def synchronize_results(self):
        # 同步所有进程中的数据
        eval_ids, eval_results = merge(self.obj_ids, self.results)
        self.aggregation_results = {"obj_ids": eval_ids, "results": eval_results}

        # 主进程上保存即可
        if is_main_process():
            # results = []
            # [results.extend(i) for i in eval_results]
            # write predict results into json file
            json_str = json.dumps(eval_results, indent=4)
            with open(self.results_file_name, 'w') as json_file:
                json_file.write(json_str)

    def evaluate(self):
        # 只在主进程上评估即可
        if is_main_process():
            # accumulate predictions from all images
            coco_true = self.coco
            coco_pre = coco_true.loadRes(self.results_file_name)

            self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)

            self.coco_evaluator.evaluate()
            self.coco_evaluator.accumulate()
            print(f"IoU metric: {self.iou_type}")
            self.coco_evaluator.summarize()

            coco_info = self.coco_evaluator.stats.tolist()  # numpy to list
            return coco_info
        else:
            return None


================================================
FILE: pytorch_keypoint/HRNet/train_utils/coco_utils.py
================================================
import torch
import torch.utils.data
from pycocotools import mask as coco_mask
from pycocotools.coco import COCO


def coco_remove_images_without_annotations(dataset, ids):
    """
    删除coco数据集中没有目标，或者目标面积非常小的数据
    refer to:
    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py
    :param dataset:
    :param cat_list:
    :return:
    """
    def _has_only_empty_bbox(anno):
        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)

    def _has_valid_annotation(anno):
        # if it's empty, there is no annotation
        if len(anno) == 0:
            return False
        # if all boxes have close to zero area, there is no annotation
        if _has_only_empty_bbox(anno):
            return False

        return True

    valid_ids = []
    for ds_idx, img_id in enumerate(ids):
        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)
        anno = dataset.loadAnns(ann_ids)

        if _has_valid_annotation(anno):
            valid_ids.append(img_id)

    return valid_ids


def convert_coco_poly_mask(segmentations, height, width):
    masks = []
    for polygons in segmentations:
        rles = coco_mask.frPyObjects(polygons, height, width)
        mask = coco_mask.decode(rles)
        if len(mask.shape) < 3:
            mask = mask[..., None]
        mask = torch.as_tensor(mask, dtype=torch.uint8)
        mask = mask.any(dim=2)
        masks.append(mask)
    if masks:
        masks = torch.stack(masks, dim=0)
    else:
        # 如果mask为空，则说明没有目标，直接返回数值为0的mask
        masks = torch.zeros((0, height, width), dtype=torch.uint8)
    return masks


def convert_to_coco_api(self):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0, see torchvision issue #1530
    ann_id = 1
    dataset = {"images": [], "categories": [], "annotations": []}
    categories = set()
    for img_idx in range(len(self)):
        targets, h, w = self.get_annotations(img_idx)
        img_id = targets["image_id"].item()
        img_dict = {"id": img_id,
                    "height": h,
                    "width": w}
        dataset["images"].append(img_dict)
        bboxes = targets["boxes"].clone()
        # convert (x_min, ymin, xmax, ymax) to (xmin, ymin, w, h)
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets["labels"].tolist()
        areas = targets["area"].tolist()
        iscrowd = targets["iscrowd"].tolist()
        if "masks" in targets:
            masks = targets["masks"]
            # make masks Fortran contiguous for coco_mask
            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {"image_id": img_id,
                   "bbox": bboxes[i],
                   "category_id": labels[i],
                   "area": areas[i],
                   "iscrowd": iscrowd[i],
                   "id": ann_id}
            categories.add(labels[i])
            if "masks" in targets:
                ann["segmentation"] = coco_mask.encode(masks[i].numpy())
            dataset["annotations"].append(ann)
            ann_id += 1
    dataset["categories"] = [{"id": i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds


================================================
FILE: pytorch_keypoint/HRNet/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    收集各个进程中的数据
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()  # 进程数
    if world_size == 1:
        return [data]

    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = int(iter_time.global_avg * (len(iterable) - i))
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,
                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_keypoint/HRNet/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k+1等份
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_keypoint/HRNet/train_utils/loss.py
================================================
import torch


class KpLoss(object):
    def __init__(self):
        self.criterion = torch.nn.MSELoss(reduction='none')

    def __call__(self, logits, targets):
        assert len(logits.shape) == 4, 'logits should be 4-ndim'
        device = logits.device
        bs = logits.shape[0]
        # [num_kps, H, W] -> [B, num_kps, H, W]
        heatmaps = torch.stack([t["heatmap"].to(device) for t in targets])
        # [num_kps] -> [B, num_kps]
        kps_weights = torch.stack([t["kps_weights"].to(device) for t in targets])

        # [B, num_kps, H, W] -> [B, num_kps]
        loss = self.criterion(logits, heatmaps).mean(dim=[2, 3])
        loss = torch.sum(loss * kps_weights) / bs
        return loss


================================================
FILE: pytorch_keypoint/HRNet/train_utils/train_eval_utils.py
================================================
import math
import sys
import time

import torch

import transforms
import train_utils.distributed_utils as utils
from .coco_eval import EvalCOCOMetric
from .loss import KpLoss


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=50, warmup=False, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    mse = KpLoss()
    mloss = torch.zeros(1).to(device)  # mean losses
    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images = torch.stack([image.to(device) for image in images])

        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            results = model(images)

            losses = mse(results, targets)

        # reduce losses over all GPUs for logging purpose
        loss_dict_reduced = utils.reduce_dict({"losses": losses})
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        # 记录训练损失
        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses

        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, device, flip=False, flip_pairs=None):
    if flip:
        assert flip_pairs is not None, "enable flip must provide flip_pairs."

    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    key_metric = EvalCOCOMetric(data_loader.dataset.coco, "keypoints", "key_results.json")
    for image, targets in metric_logger.log_every(data_loader, 100, header):
        images = torch.stack([img.to(device) for img in image])

        # 当使用CPU时，跳过GPU相关指令
        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        outputs = model(images)
        if flip:
            flipped_images = transforms.flip_images(images)
            flipped_outputs = model(flipped_images)
            flipped_outputs = transforms.flip_back(flipped_outputs, flip_pairs)
            # feature is not aligned, shift flipped heatmap for higher accuracy
            # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22
            flipped_outputs[..., 1:] = flipped_outputs.clone()[..., 0:-1]
            outputs = (outputs + flipped_outputs) * 0.5

        model_time = time.time() - model_time

        # decode keypoint
        reverse_trans = [t["reverse_trans"] for t in targets]
        outputs = transforms.get_final_preds(outputs, reverse_trans, post_processing=True)

        key_metric.update(targets, outputs)
        metric_logger.update(model_time=model_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)

    # 同步所有进程中的数据
    key_metric.synchronize_results()

    if utils.is_main_process():
        coco_info = key_metric.evaluate()
    else:
        coco_info = None

    return coco_info


================================================
FILE: pytorch_keypoint/HRNet/transforms.py
================================================
import math
import random
from typing import Tuple

import cv2
import numpy as np
import torch
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt


def flip_images(img):
    assert len(img.shape) == 4, 'images has to be [batch_size, channels, height, width]'
    img = torch.flip(img, dims=[3])
    return img


def flip_back(output_flipped, matched_parts):
    assert len(output_flipped.shape) == 4, 'output_flipped has to be [batch_size, num_joints, height, width]'
    output_flipped = torch.flip(output_flipped, dims=[3])

    for pair in matched_parts:
        tmp = output_flipped[:, pair[0]].clone()
        output_flipped[:, pair[0]] = output_flipped[:, pair[1]]
        output_flipped[:, pair[1]] = tmp

    return output_flipped


def get_max_preds(batch_heatmaps):
    """
    get predictions from score maps
    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
    """
    assert isinstance(batch_heatmaps, torch.Tensor), 'batch_heatmaps should be torch.Tensor'
    assert len(batch_heatmaps.shape) == 4, 'batch_images should be 4-ndim'

    batch_size, num_joints, h, w = batch_heatmaps.shape
    heatmaps_reshaped = batch_heatmaps.reshape(batch_size, num_joints, -1)
    maxvals, idx = torch.max(heatmaps_reshaped, dim=2)

    maxvals = maxvals.unsqueeze(dim=-1)
    idx = idx.float()

    preds = torch.zeros((batch_size, num_joints, 2)).to(batch_heatmaps)

    preds[:, :, 0] = idx % w  # column 对应最大值的x坐标
    preds[:, :, 1] = torch.floor(idx / w)  # row 对应最大值的y坐标

    pred_mask = torch.gt(maxvals, 0.0).repeat(1, 1, 2).float().to(batch_heatmaps.device)

    preds *= pred_mask
    return preds, maxvals


def affine_points(pt, t):
    ones = np.ones((pt.shape[0], 1), dtype=float)
    pt = np.concatenate([pt, ones], axis=1).T
    new_pt = np.dot(t, pt)
    return new_pt.T


def get_final_preds(batch_heatmaps: torch.Tensor,
                    trans: list = None,
                    post_processing: bool = False):
    assert trans is not None
    coords, maxvals = get_max_preds(batch_heatmaps)

    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]

    # post-processing
    if post_processing:
        for n in range(coords.shape[0]):
            for p in range(coords.shape[1]):
                hm = batch_heatmaps[n][p]
                px = int(math.floor(coords[n][p][0] + 0.5))
                py = int(math.floor(coords[n][p][1] + 0.5))
                if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
                    diff = torch.tensor(
                        [
                            hm[py][px + 1] - hm[py][px - 1],
                            hm[py + 1][px] - hm[py - 1][px]
                        ]
                    ).to(batch_heatmaps.device)
                    coords[n][p] += torch.sign(diff) * .25

    preds = coords.clone().cpu().numpy()

    # Transform back
    for i in range(coords.shape[0]):
        preds[i] = affine_points(preds[i], trans[i])

    return preds, maxvals.cpu().numpy()


def decode_keypoints(outputs, origin_hw, num_joints: int = 17):
    keypoints = []
    scores = []
    heatmap_h, heatmap_w = outputs.shape[-2:]
    for i in range(num_joints):
        pt = np.unravel_index(np.argmax(outputs[i]), (heatmap_h, heatmap_w))
        score = outputs[i, pt[0], pt[1]]
        keypoints.append(pt[::-1])  # hw -> wh(xy)
        scores.append(score)

    keypoints = np.array(keypoints, dtype=float)
    scores = np.array(scores, dtype=float)
    # convert to full image scale
    keypoints[:, 0] = np.clip(keypoints[:, 0] / heatmap_w * origin_hw[1],
                              a_min=0,
                              a_max=origin_hw[1])
    keypoints[:, 1] = np.clip(keypoints[:, 1] / heatmap_h * origin_hw[0],
                              a_min=0,
                              a_max=origin_hw[0])
    return keypoints, scores


def resize_pad(img: np.ndarray, size: tuple):
    h, w, c = img.shape
    src = np.array([[0, 0],       # 原坐标系中图像左上角点
                    [w - 1, 0],   # 原坐标系中图像右上角点
                    [0, h - 1]],  # 原坐标系中图像左下角点
                   dtype=np.float32)
    dst = np.zeros((3, 2), dtype=np.float32)
    if h / w > size[0] / size[1]:
        # 需要在w方向padding
        wi = size[0] * (w / h)
        pad_w = (size[1] - wi) / 2
        dst[0, :] = [pad_w - 1, 0]            # 目标坐标系中图像左上角点
        dst[1, :] = [size[1] - pad_w - 1, 0]  # 目标坐标系中图像右上角点
        dst[2, :] = [pad_w - 1, size[0] - 1]  # 目标坐标系中图像左下角点
    else:
        # 需要在h方向padding
        hi = size[1] * (h / w)
        pad_h = (size[0] - hi) / 2
        dst[0, :] = [0, pad_h - 1]            # 目标坐标系中图像左上角点
        dst[1, :] = [size[1] - 1, pad_h - 1]  # 目标坐标系中图像右上角点
        dst[2, :] = [0, size[0] - pad_h - 1]  # 目标坐标系中图像左下角点

    trans = cv2.getAffineTransform(src, dst)  # 计算正向仿射变换矩阵
    # 对图像进行仿射变换
    resize_img = cv2.warpAffine(img,
                                trans,
                                size[::-1],  # w, h
                                flags=cv2.INTER_LINEAR)
    # import matplotlib.pyplot as plt
    # plt.imshow(resize_img)
    # plt.show()

    dst /= 4  # 网络预测的heatmap尺寸是输入图像的1/4
    reverse_trans = cv2.getAffineTransform(dst, src)  # 计算逆向仿射变换矩阵，方便后续还原

    return resize_img, reverse_trans


def adjust_box(xmin: float, ymin: float, w: float, h: float, fixed_size: Tuple[float, float]):
    """通过增加w或者h的方式保证输入图片的长宽比固定"""
    xmax = xmin + w
    ymax = ymin + h

    hw_ratio = fixed_size[0] / fixed_size[1]
    if h / w > hw_ratio:
        # 需要在w方向padding
        wi = h / hw_ratio
        pad_w = (wi - w) / 2
        xmin = xmin - pad_w
        xmax = xmax + pad_w
    else:
        # 需要在h方向padding
        hi = w * hw_ratio
        pad_h = (hi - h) / 2
        ymin = ymin - pad_h
        ymax = ymax + pad_h

    return xmin, ymin, xmax, ymax


def scale_box(xmin: float, ymin: float, w: float, h: float, scale_ratio: Tuple[float, float]):
    """根据传入的h、w缩放因子scale_ratio，重新计算xmin，ymin，w，h"""
    s_h = h * scale_ratio[0]
    s_w = w * scale_ratio[1]
    xmin = xmin - (s_w - w) / 2.
    ymin = ymin - (s_h - h) / 2.
    return xmin, ymin, s_w, s_h


def plot_heatmap(image, heatmap, kps, kps_weights):
    for kp_id in range(len(kps_weights)):
        if kps_weights[kp_id] > 0:
            plt.subplot(1, 2, 1)
            plt.imshow(image)
            plt.plot(*kps[kp_id].tolist(), "ro")
            plt.title("image")
            plt.subplot(1, 2, 2)
            plt.imshow(heatmap[kp_id], cmap=plt.cm.Blues)
            plt.colorbar(ticks=[0, 1])
            plt.title(f"kp_id: {kp_id}")
            plt.show()


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class ToTensor(object):
    """将PIL图像转为Tensor"""
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target


class Normalize(object):
    def __init__(self, mean=None, std=None):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target


class HalfBody(object):
    def __init__(self, p: float = 0.3, upper_body_ids=None, lower_body_ids=None):
        assert upper_body_ids is not None
        assert lower_body_ids is not None
        self.p = p
        self.upper_body_ids = upper_body_ids
        self.lower_body_ids = lower_body_ids

    def __call__(self, image, target):
        if random.random() < self.p:
            kps = target["keypoints"]
            vis = target["visible"]
            upper_kps = []
            lower_kps = []

            # 对可见的keypoints进行归类
            for i, v in enumerate(vis):
                if v > 0.5:
                    if i in self.upper_body_ids:
                        upper_kps.append(kps[i])
                    else:
                        lower_kps.append(kps[i])

            # 50%的概率选择上或下半身
            if random.random() < 0.5:
                selected_kps = upper_kps
            else:
                selected_kps = lower_kps

            # 如果点数太少就不做任何处理
            if len(selected_kps) > 2:
                selected_kps = np.array(selected_kps, dtype=np.float32)
                xmin, ymin = np.min(selected_kps, axis=0).tolist()
                xmax, ymax = np.max(selected_kps, axis=0).tolist()
                w = xmax - xmin
                h = ymax - ymin
                if w > 1 and h > 1:
                    # 把w和h适当放大点，要不然关键点处于边缘位置
                    xmin, ymin, w, h = scale_box(xmin, ymin, w, h, (1.5, 1.5))
                    target["box"] = [xmin, ymin, w, h]

        return image, target


class AffineTransform(object):
    """scale+rotation"""
    def __init__(self,
                 scale: Tuple[float, float] = None,  # e.g. (0.65, 1.35)
                 rotation: Tuple[int, int] = None,   # e.g. (-45, 45)
                 fixed_size: Tuple[int, int] = (256, 192)):
        self.scale = scale
        self.rotation = rotation
        self.fixed_size = fixed_size

    def __call__(self, img, target):
        src_xmin, src_ymin, src_xmax, src_ymax = adjust_box(*target["box"], fixed_size=self.fixed_size)
        src_w = src_xmax - src_xmin
        src_h = src_ymax - src_ymin
        src_center = np.array([(src_xmin + src_xmax) / 2, (src_ymin + src_ymax) / 2])
        src_p2 = src_center + np.array([0, -src_h / 2])  # top middle
        src_p3 = src_center + np.array([src_w / 2, 0])   # right middle

        dst_center = np.array([(self.fixed_size[1] - 1) / 2, (self.fixed_size[0] - 1) / 2])
        dst_p2 = np.array([(self.fixed_size[1] - 1) / 2, 0])  # top middle
        dst_p3 = np.array([self.fixed_size[1] - 1, (self.fixed_size[0] - 1) / 2])  # right middle

        if self.scale is not None:
            scale = random.uniform(*self.scale)
            src_w = src_w * scale
            src_h = src_h * scale
            src_p2 = src_center + np.array([0, -src_h / 2])  # top middle
            src_p3 = src_center + np.array([src_w / 2, 0])   # right middle

        if self.rotation is not None:
            angle = random.randint(*self.rotation)  # 角度制
            angle = angle / 180 * math.pi  # 弧度制
            src_p2 = src_center + np.array([src_h / 2 * math.sin(angle), -src_h / 2 * math.cos(angle)])
            src_p3 = src_center + np.array([src_w / 2 * math.cos(angle), src_w / 2 * math.sin(angle)])

        src = np.stack([src_center, src_p2, src_p3]).astype(np.float32)
        dst = np.stack([dst_center, dst_p2, dst_p3]).astype(np.float32)

        trans = cv2.getAffineTransform(src, dst)  # 计算正向仿射变换矩阵
        dst /= 4  # 网络预测的heatmap尺寸是输入图像的1/4
        reverse_trans = cv2.getAffineTransform(dst, src)  # 计算逆向仿射变换矩阵，方便后续还原

        # 对图像进行仿射变换
        resize_img = cv2.warpAffine(img,
                                    trans,
                                    tuple(self.fixed_size[::-1]),  # [w, h]
                                    flags=cv2.INTER_LINEAR)

        if "keypoints" in target:
            kps = target["keypoints"]
            mask = np.logical_and(kps[:, 0] != 0, kps[:, 1] != 0)
            kps[mask] = affine_points(kps[mask], trans)
            target["keypoints"] = kps

        # import matplotlib.pyplot as plt
        # from draw_utils import draw_keypoints
        # resize_img = draw_keypoints(resize_img, target["keypoints"])
        # plt.imshow(resize_img)
        # plt.show()

        target["trans"] = trans
        target["reverse_trans"] = reverse_trans
        return resize_img, target


class RandomHorizontalFlip(object):
    """随机对输入图片进行水平翻转，注意该方法必须接在 AffineTransform 后"""
    def __init__(self, p: float = 0.5, matched_parts: list = None):
        assert matched_parts is not None
        self.p = p
        self.matched_parts = matched_parts

    def __call__(self, image, target):
        if random.random() < self.p:
            # [h, w, c]
            image = np.ascontiguousarray(np.flip(image, axis=[1]))
            keypoints = target["keypoints"]
            visible = target["visible"]
            width = image.shape[1]

            # Flip horizontal
            keypoints[:, 0] = width - keypoints[:, 0] - 1

            # Change left-right parts
            for pair in self.matched_parts:
                keypoints[pair[0], :], keypoints[pair[1], :] = \
                    keypoints[pair[1], :], keypoints[pair[0], :].copy()

                visible[pair[0]], visible[pair[1]] = \
                    visible[pair[1]], visible[pair[0]].copy()

            target["keypoints"] = keypoints
            target["visible"] = visible

        return image, target


class KeypointToHeatMap(object):
    def __init__(self,
                 heatmap_hw: Tuple[int, int] = (256 // 4, 192 // 4),
                 gaussian_sigma: int = 2,
                 keypoints_weights=None):
        self.heatmap_hw = heatmap_hw
        self.sigma = gaussian_sigma
        self.kernel_radius = self.sigma * 3
        self.use_kps_weights = False if keypoints_weights is None else True
        self.kps_weights = keypoints_weights

        # generate gaussian kernel(not normalized)
        kernel_size = 2 * self.kernel_radius + 1
        kernel = np.zeros((kernel_size, kernel_size), dtype=np.float32)
        x_center = y_center = kernel_size // 2
        for x in range(kernel_size):
            for y in range(kernel_size):
                kernel[y, x] = np.exp(-((x - x_center) ** 2 + (y - y_center) ** 2) / (2 * self.sigma ** 2))
        # print(kernel)

        self.kernel = kernel

    def __call__(self, image, target):
        kps = target["keypoints"]
        num_kps = kps.shape[0]
        kps_weights = np.ones((num_kps,), dtype=np.float32)
        if "visible" in target:
            visible = target["visible"]
            kps_weights = visible

        heatmap = np.zeros((num_kps, self.heatmap_hw[0], self.heatmap_hw[1]), dtype=np.float32)
        heatmap_kps = (kps / 4 + 0.5).astype(np.int)  # round
        for kp_id in range(num_kps):
            v = kps_weights[kp_id]
            if v < 0.5:
                # 如果该点的可见度很低，则直接忽略
                continue

            x, y = heatmap_kps[kp_id]
            ul = [x - self.kernel_radius, y - self.kernel_radius]  # up-left x,y
            br = [x + self.kernel_radius, y + self.kernel_radius]  # bottom-right x,y
            # 如果以xy为中心kernel_radius为半径的辐射范围内与heatmap没交集，则忽略该点(该规则并不严格)
            if ul[0] > self.heatmap_hw[1] - 1 or \
                    ul[1] > self.heatmap_hw[0] - 1 or \
                    br[0] < 0 or \
                    br[1] < 0:
                # If not, just return the image as is
                kps_weights[kp_id] = 0
                continue

            # Usable gaussian range
            # 计算高斯核有效区域（高斯核坐标系）
            g_x = (max(0, -ul[0]), min(br[0], self.heatmap_hw[1] - 1) - ul[0])
            g_y = (max(0, -ul[1]), min(br[1], self.heatmap_hw[0] - 1) - ul[1])
            # image range
            # 计算heatmap中的有效区域（heatmap坐标系）
            img_x = (max(0, ul[0]), min(br[0], self.heatmap_hw[1] - 1))
            img_y = (max(0, ul[1]), min(br[1], self.heatmap_hw[0] - 1))

            if kps_weights[kp_id] > 0.5:
                # 将高斯核有效区域复制到heatmap对应区域
                heatmap[kp_id][img_y[0]:img_y[1] + 1, img_x[0]:img_x[1] + 1] = \
                    self.kernel[g_y[0]:g_y[1] + 1, g_x[0]:g_x[1] + 1]

        if self.use_kps_weights:
            kps_weights = np.multiply(kps_weights, self.kps_weights)

        # plot_heatmap(image, heatmap, kps, kps_weights)

        target["heatmap"] = torch.as_tensor(heatmap, dtype=torch.float32)
        target["kps_weights"] = torch.as_tensor(kps_weights, dtype=torch.float32)

        return image, target


================================================
FILE: pytorch_keypoint/HRNet/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
"""

import os
import json

import torch
from tqdm import tqdm
import numpy as np

from model import HighResolutionNet
from train_utils import EvalCOCOMetric
from my_dataset_coco import CocoKeypoint
import transforms


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 10, [""] * 10
    stats[0], print_list[0] = _summarize(1, maxDets=20)
    stats[1], print_list[1] = _summarize(1, maxDets=20, iouThr=.5)
    stats[2], print_list[2] = _summarize(1, maxDets=20, iouThr=.75)
    stats[3], print_list[3] = _summarize(1, maxDets=20, areaRng='medium')
    stats[4], print_list[4] = _summarize(1, maxDets=20, areaRng='large')
    stats[5], print_list[5] = _summarize(0, maxDets=20)
    stats[6], print_list[6] = _summarize(0, maxDets=20, iouThr=.5)
    stats[7], print_list[7] = _summarize(0, maxDets=20, iouThr=.75)
    stats[8], print_list[8] = _summarize(0, maxDets=20, areaRng='medium')
    stats[9], print_list[9] = _summarize(0, maxDets=20, areaRng='large')

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def save_info(coco_evaluator,
              save_name: str = "record_mAP.txt"):
    # calculate COCO info for all keypoints
    coco_stats, print_coco = summarize(coco_evaluator)

    # 将验证结果保存至txt文件中
    with open(save_name, "w") as f:
        record_lines = ["COCO results:", print_coco]
        f.write("\n".join(record_lines))


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {
        "val": transforms.Compose([
            transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=args.resize_hw),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }

    # read class_indict
    label_json_path = args.label_json_path
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        person_coco_info = json.load(f)

    data_root = args.data_path

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = CocoKeypoint(data_root, "val", transforms=data_transform["val"], det_json_path=None)
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt", transforms=data_transform["val"])
    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     pin_memory=True,
                                                     num_workers=nw,
                                                     collate_fn=val_dataset.collate_fn)

    # create model
    model = HighResolutionNet()

    # 载入你自己训练好的模型权重
    weights_path = args.weights_path
    assert os.path.exists(weights_path), "not found {} file.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location='cpu'))
    # print(model)
    model.to(device)

    # evaluate on the val dataset
    key_metric = EvalCOCOMetric(val_dataset.coco, "keypoints", "key_results.json")
    model.eval()
    with torch.no_grad():
        for images, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            images = images.to(device)

            # inference
            outputs = model(images)
            if args.flip:
                flipped_images = transforms.flip_images(images)
                flipped_outputs = model(flipped_images)
                flipped_outputs = transforms.flip_back(flipped_outputs, person_coco_info["flip_pairs"])
                # feature is not aligned, shift flipped heatmap for higher accuracy
                # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22
                flipped_outputs[..., 1:] = flipped_outputs.clone()[..., 0:-1]
                outputs = (outputs + flipped_outputs) * 0.5

            # decode keypoint
            reverse_trans = [t["reverse_trans"] for t in targets]
            outputs = transforms.get_final_preds(outputs, reverse_trans, post_processing=True)

            key_metric.update(targets, outputs)

    key_metric.synchronize_results()
    key_metric.evaluate()

    save_info(key_metric.coco_evaluator, "keypoint_record_mAP.txt")


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda:0', help='device')

    parser.add_argument('--resize-hw', type=list, default=[256, 192], help="resize for predict")
    # 是否开启图像翻转
    parser.add_argument('--flip', type=bool, default=True, help='whether using flipped images')

    # 数据集的根目录
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset root')

    # 训练好的权重文件
    parser.add_argument('--weights-path', default='./pose_hrnet_w32_256x192.pth', type=str, help='training weights')

    # batch size
    parser.add_argument('--batch-size', default=1, type=int, metavar='N',
                        help='batch size when validation.')
    # 类别索引和类别名称对应关系
    parser.add_argument('--label-json-path', type=str, default="person_keypoints.json")
    # 原项目提供的验证集person检测信息，如果要使用GT信息，直接将该参数置为None
    parser.add_argument('--person-det', type=str, default="./COCO_val2017_detections_AP_H_56_person.json")

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_object_detection/faster_rcnn/README.md
================================================
# Faster R-CNN

## 该项目主要是来自pytorch官方torchvision模块中的源码
* https://github.com/pytorch/vision/tree/master/torchvision/models/detection

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.7.1(注意：必须是1.6.0或以上，因为使用官方提供的混合精度训练1.6.0后才支持)
* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))
* Ubuntu或Centos(不建议Windows)
* 最好使用GPU训练
* 详细环境配置见`requirements.txt`

## 文件结构：
```
  ├── backbone: 特征提取网络，可以根据自己的要求选择
  ├── network_files: Faster R-CNN网络（包括Fast R-CNN以及RPN等模块）
  ├── train_utils: 训练验证相关模块（包括cocotools）
  ├── my_dataset.py: 自定义dataset用于读取VOC数据集
  ├── train_mobilenet.py: 以MobileNetV2做为backbone进行训练
  ├── train_resnet50_fpn.py: 以resnet50+FPN做为backbone进行训练
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
  └── pascal_voc_classes.json: pascal_voc标签文件
```

## 预训练权重下载地址（下载后放入backbone文件夹中）：
* MobileNetV2 weights(下载后重命名为`mobilenet_v2.pth`，然后放到`bakcbone`文件夹下): https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
* Resnet50 weights(下载后重命名为`resnet50.pth`，然后放到`bakcbone`文件夹下): https://download.pytorch.org/models/resnet50-0676ba61.pth
* ResNet50+FPN weights: https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
* 注意，下载的预训练权重记得要重命名，比如在train_resnet50_fpn.py中读取的是`fasterrcnn_resnet50_fpn_coco.pth`文件，
  不是`fasterrcnn_resnet50_fpn_coco-258fb6c6.pth`，然后放到当前项目根目录下即可。
 
 
## 数据集，本例程使用的是PASCAL VOC2012数据集
* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK
* 使用ResNet50+FPN以及迁移学习在VOC2012数据集上得到的权重: 链接:https://pan.baidu.com/s/1ifilndFRtAV5RDZINSHj5w 提取码:dsz8

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要训练mobilenetv2+fasterrcnn，直接使用train_mobilenet.py训练脚本
* 若要训练resnet50+fpn+fasterrcnn，直接使用train_resnet50_fpn.py训练脚本
* 若要使用多GPU训练，使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_GPU.py`

## 注意事项
* 在使用训练脚本时，注意要将`--data-path`(VOC_root)设置为自己存放`VOCdevkit`文件夹所在的**根目录**
* 由于带有FPN结构的Faster RCNN很吃显存，如果GPU的显存不够(如果batch_size小于8的话)建议在create_model函数中使用默认的norm_layer，
  即不传递norm_layer变量，默认去使用FrozenBatchNorm2d(即不会去更新参数的bn层),使用中发现效果也很好。
* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率
* 在使用预测脚本时，要将`train_weights`设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可，其他代码尽量不要改动

## 如果对Faster RCNN原理不是很理解可参考我的bilibili
* https://b23.tv/sXcBSP

## 进一步了解该项目，以及对Faster RCNN代码的分析可参考我的bilibili
* https://b23.tv/HvMiDy

## Faster RCNN框架图
![Faster R-CNN](fasterRCNN.png) 


================================================
FILE: pytorch_object_detection/faster_rcnn/backbone/__init__.py
================================================
from .resnet50_fpn_model import resnet50_fpn_backbone
from .mobilenetv2_model import MobileNetV2
from .vgg_model import vgg
from .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN


================================================
FILE: pytorch_object_detection/faster_rcnn/backbone/feature_pyramid_network.py
================================================
from collections import OrderedDict

import torch.nn as nn
import torch
from torch import Tensor
import torch.nn.functional as F

from torch.jit.annotations import Tuple, List, Dict


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model
    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.
    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.
    Arguments:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model, return_layers):
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")

        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}
        layers = OrderedDict()

        # 遍历模型子模块按顺序存入有序字典
        # 只保存layer4及其之前的结构，舍去之后不用的结构
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super().__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = OrderedDict()
        # 依次遍历模型的所有子模块，并进行正向传播，
        # 收集layer1, layer2, layer3, layer4的输出
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class FeaturePyramidNetwork(nn.Module):
    """
    Module that adds a FPN from on top of a set of feature maps. This is based on
    `"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
    The feature maps are currently supposed to be in increasing depth
    order.
    The input to the model is expected to be an OrderedDict[Tensor], containing
    the feature maps on top of which the FPN will be added.
    Arguments:
        in_channels_list (list[int]): number of channels for each feature map that
            is passed to the module
        out_channels (int): number of channels of the FPN representation
        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
            be performed. It is expected to take the fpn features, the original
            features and the names of the original features as input, and returns
            a new list of feature maps and their corresponding names
    """

    def __init__(self, in_channels_list, out_channels, extra_blocks=None):
        super().__init__()
        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）
        self.inner_blocks = nn.ModuleList()
        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
        self.layer_blocks = nn.ModuleList()
        for in_channels in in_channels_list:
            if in_channels == 0:
                continue
            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
            self.inner_blocks.append(inner_block_module)
            self.layer_blocks.append(layer_block_module)

        # initialize parameters now to avoid modifying the initialization of top_blocks
        for m in self.children():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)

        self.extra_blocks = extra_blocks

    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.inner_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.inner_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.inner_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.layer_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.layer_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.layer_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
        """
        Computes the FPN for a set of feature maps.
        Arguments:
            x (OrderedDict[Tensor]): feature maps for each feature level.
        Returns:
            results (OrderedDict[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        # unpack OrderedDict into two lists for easier handling
        names = list(x.keys())
        x = list(x.values())

        # 将resnet layer4的channel调整到指定的out_channels
        # last_inner = self.inner_blocks[-1](x[-1])
        last_inner = self.get_result_from_inner_blocks(x[-1], -1)
        # result中保存着每个预测特征层
        results = []
        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵
        # results.append(self.layer_blocks[-1](last_inner))
        results.append(self.get_result_from_layer_blocks(last_inner, -1))

        for idx in range(len(x) - 2, -1, -1):
            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
            feat_shape = inner_lateral.shape[-2:]
            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
            last_inner = inner_lateral + inner_top_down
            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))

        # 在layer4对应的预测特征层基础上生成预测特征矩阵5
        if self.extra_blocks is not None:
            results, names = self.extra_blocks(results, x, names)

        # make it back an OrderedDict
        out = OrderedDict([(k, v) for k, v in zip(names, results)])

        return out


class LastLevelMaxPool(torch.nn.Module):
    """
    Applies a max_pool2d on top of the last feature map
    """

    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:
        names.append("pool")
        x.append(F.max_pool2d(x[-1], 1, 2, 0))  # input, kernel_size, stride, padding
        return x, names


class BackboneWithFPN(nn.Module):
    """
    Adds a FPN on top of a model.
    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
    extract a submodel that returns the feature maps specified in return_layers.
    The same limitations of IntermediatLayerGetter apply here.
    Arguments:
        backbone (nn.Module)
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
        in_channels_list (List[int]): number of channels for each feature map
            that is returned, in the order they are present in the OrderedDict
        out_channels (int): number of channels in the FPN.
        extra_blocks: ExtraFPNBlock
    Attributes:
        out_channels (int): the number of channels in the FPN
    """

    def __init__(self,
                 backbone: nn.Module,
                 return_layers=None,
                 in_channels_list=None,
                 out_channels=256,
                 extra_blocks=None,
                 re_getter=True):
        super().__init__()

        if extra_blocks is None:
            extra_blocks = LastLevelMaxPool()

        if re_getter is True:
            assert return_layers is not None
            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        else:
            self.body = backbone

        self.fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=extra_blocks,
        )

        self.out_channels = out_channels

    def forward(self, x):
        x = self.body(x)
        x = self.fpn(x)
        return x


================================================
FILE: pytorch_object_detection/faster_rcnn/backbone/mobilenetv2_model.py
================================================
from torch import nn
import torch
from torchvision.ops import misc


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1, norm_layer=None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            norm_layer(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio, norm_layer=None):
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        layers = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1, norm_layer=norm_layer))
        layers.extend([
            # 3x3 depthwise conv
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel, norm_layer=norm_layer),
            # 1x1 pointwise conv(linear)
            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
            norm_layer(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8, weights_path=None, norm_layer=None):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # conv1 layer
        features.append(ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer))
        # building inverted residual residual blockes
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, last_channel, 1, norm_layer=norm_layer))
        # combine feature layers
        self.features = nn.Sequential(*features)

        # building classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        if weights_path is None:
            # weight initialization
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out')
                    if m.bias is not None:
                        nn.init.zeros_(m.bias)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.ones_(m.weight)
                    nn.init.zeros_(m.bias)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.zeros_(m.bias)
        else:
            self.load_state_dict(torch.load(weights_path))

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


================================================
FILE: pytorch_object_detection/faster_rcnn/backbone/resnet50_fpn_model.py
================================================
import os

import torch
import torch.nn as nn
from torchvision.ops.misc import FrozenBatchNorm2d

from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = norm_layer(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = norm_layer(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def overwrite_eps(model, eps):
    """
    This method overwrites the default eps values of all the
    FrozenBatchNorm2d layers of the model with the provided value.
    This is necessary to address the BC-breaking change introduced
    by the bug-fix at pytorch/vision#2933. The overwrite is applied
    only when the pretrained weights are loaded to maintain compatibility
    with previous versions.

    Args:
        model (nn.Module): The model on which we perform the overwrite.
        eps (float): The new value of eps.
    """
    for module in model.modules():
        if isinstance(module, FrozenBatchNorm2d):
            module.eps = eps


def resnet50_fpn_backbone(pretrain_path="",
                          norm_layer=FrozenBatchNorm2d,  # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新
                          trainable_layers=3,
                          returned_layers=None,
                          extra_blocks=None):
    """
    搭建resnet50_fpn——backbone
    Args:
        pretrain_path: resnet50的预训练权重，如果不使用就默认为空
        norm_layer: 官方默认的是FrozenBatchNorm2d，即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差，还不如不用bn层)
                    如果自己的GPU显存很大可以设置很大的batch_size，那么自己可以传入正常的BatchNorm2d层
                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
        trainable_layers: 指定训练哪些层结构
        returned_layers: 指定哪些层的输出需要返回
        extra_blocks: 在输出的特征层基础上额外添加的层结构

    Returns:

    """
    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],
                             include_top=False,
                             norm_layer=norm_layer)

    if isinstance(norm_layer, FrozenBatchNorm2d):
        overwrite_eps(resnet_backbone, 0.0)

    if pretrain_path != "":
        assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path)
        # 载入预训练权重
        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))

    # select layers that wont be frozen
    assert 0 <= trainable_layers <= 5
    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]

    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1
    if trainable_layers == 5:
        layers_to_train.append("bn1")

    # freeze layers
    for name, parameter in resnet_backbone.named_parameters():
        # 只训练不在layers_to_train列表中的层结构
        if all([not name.startswith(layer) for layer in layers_to_train]):
            parameter.requires_grad_(False)

    if extra_blocks is None:
        extra_blocks = LastLevelMaxPool()

    if returned_layers is None:
        returned_layers = [1, 2, 3, 4]
    # 返回的特征层个数肯定大于0小于5
    assert min(returned_layers) > 0 and max(returned_layers) < 5

    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}

    # in_channel 为layer4的输出特征矩阵channel = 2048
    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256
    # 记录resnet50提供给fpn的每个特征层channel
    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]
    # 通过fpn后得到的每个特征层的channel
    out_channels = 256
    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)


================================================
FILE: pytorch_object_detection/faster_rcnn/backbone/vgg_model.py
================================================
import torch.nn as nn
import torch


class VGG(nn.Module):
    def __init__(self, features, class_num=1000, init_weights=False, weights_path=None):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, class_num)
        )
        if init_weights and weights_path is None:
            self._initialize_weights()

        if weights_path is not None:
            self.load_state_dict(torch.load(weights_path))

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = torch.flatten(x, start_dim=1)
        # N x 512*7*7
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


def make_features(cfg: list):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU(True)]
            in_channels = v
    return nn.Sequential(*layers)


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg(model_name="vgg16", weights_path=None):
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]

    model = VGG(make_features(cfg), weights_path=weights_path)
    return model


================================================
FILE: pytorch_object_detection/faster_rcnn/change_backbone_with_fpn.py
================================================
import os
import datetime

import torch

import transforms
from network_files import FasterRCNN, AnchorsGenerator
from my_dataset import VOCDataSet
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from train_utils import train_eval_utils as utils
from backbone import BackboneWithFPN, LastLevelMaxPool


def create_model(num_classes):
    import torchvision
    from torchvision.models.feature_extraction import create_feature_extractor

    # --- mobilenet_v3_large fpn backbone --- #
    backbone = torchvision.models.mobilenet_v3_large(pretrained=True)
    # print(backbone)
    return_layers = {"features.6": "0",   # stride 8
                     "features.12": "1",  # stride 16
                     "features.16": "2"}  # stride 32
    # 提供给fpn的每个特征层channel
    in_channels_list = [40, 112, 960]
    new_backbone = create_feature_extractor(backbone, return_layers)
    # img = torch.randn(1, 3, 224, 224)
    # outputs = new_backbone(img)
    # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()]

    # --- efficientnet_b0 fpn backbone --- #
    # backbone = torchvision.models.efficientnet_b0(pretrained=True)
    # # print(backbone)
    # return_layers = {"features.3": "0",  # stride 8
    #                  "features.4": "1",  # stride 16
    #                  "features.8": "2"}  # stride 32
    # # 提供给fpn的每个特征层channel
    # in_channels_list = [40, 80, 1280]
    # new_backbone = create_feature_extractor(backbone, return_layers)
    # # img = torch.randn(1, 3, 224, 224)
    # # outputs = new_backbone(img)
    # # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()]

    backbone_with_fpn = BackboneWithFPN(new_backbone,
                                        return_layers=return_layers,
                                        in_channels_list=in_channels_list,
                                        out_channels=256,
                                        extra_blocks=LastLevelMaxPool(),
                                        re_getter=False)

    anchor_sizes = ((64,), (128,), (256,), (512,))
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    anchor_generator = AnchorsGenerator(sizes=anchor_sizes,
                                        aspect_ratios=aspect_ratios)

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2'],  # 在哪些特征层上进行RoIAlign pooling
                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone_with_fpn,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)
    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      num_workers=nw,
                                                      collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    model = create_model(num_classes=args.num_classes + 1)
    # print(model)

    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.33)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if args.resume != "":
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])
        print("the training process from epoch{}...".format(args.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 10 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device=device, epoch=epoch,
                                              print_freq=50, warmup=True,
                                              scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_set_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=15, type=int, metavar='N',
                        help='number of total epochs to run')
    # 学习率
    parser.add_argument('--lr', default=0.005, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练的batch size
    parser.add_argument('--batch_size', default=4, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/faster_rcnn/change_backbone_without_fpn.py
================================================
import os
import datetime

import torch

import transforms
from network_files import FasterRCNN, AnchorsGenerator
from my_dataset import VOCDataSet
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from train_utils import train_eval_utils as utils


def create_model(num_classes):
    import torchvision
    from torchvision.models.feature_extraction import create_feature_extractor

    # vgg16
    backbone = torchvision.models.vgg16_bn(pretrained=True)
    # print(backbone)
    backbone = create_feature_extractor(backbone, return_nodes={"features.42": "0"})
    # out = backbone(torch.rand(1, 3, 224, 224))
    # print(out["0"].shape)
    backbone.out_channels = 512

    # resnet50 backbone
    # backbone = torchvision.models.resnet50(pretrained=True)
    # # print(backbone)
    # backbone = create_feature_extractor(backbone, return_nodes={"layer3": "0"})
    # # out = backbone(torch.rand(1, 3, 224, 224))
    # # print(out["0"].shape)
    # backbone.out_channels = 1024

    # EfficientNetB0
    # backbone = torchvision.models.efficientnet_b0(pretrained=True)
    # # print(backbone)
    # backbone = create_feature_extractor(backbone, return_nodes={"features.5": "0"})
    # # out = backbone(torch.rand(1, 3, 224, 224))
    # # print(out["0"].shape)
    # backbone.out_channels = 112

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行RoIAlign pooling
                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)
    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      num_workers=nw,
                                                      collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    model = create_model(num_classes=args.num_classes + 1)
    # print(model)

    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.33)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if args.resume != "":
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])
        print("the training process from epoch{}...".format(args.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 10 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device=device, epoch=epoch,
                                              print_freq=50, warmup=True,
                                              scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_set_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=15, type=int, metavar='N',
                        help='number of total epochs to run')
    # 学习率
    parser.add_argument('--lr', default=0.005, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练的batch size
    parser.add_argument('--batch_size', default=4, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/faster_rcnn/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = False):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: pytorch_object_detection/faster_rcnn/my_dataset.py
================================================
import numpy as np
from torch.utils.data import Dataset
import os
import torch
import json
from PIL import Image
from lxml import etree


class VOCDataSet(Dataset):
    """读取解析PASCAL VOC2007/2012数据集"""

    def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        # 增加容错能力
        if "VOCdevkit" in voc_root:
            self.root = os.path.join(voc_root, f"VOC{year}")
        else:
            self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        self.img_root = os.path.join(self.root, "JPEGImages")
        self.annotations_root = os.path.join(self.root, "Annotations")

        # read train.txt or val.txt file
        txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name)
        assert os.path.exists(txt_path), "not found {} file.".format(txt_name)

        with open(txt_path) as read:
            xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
                        for line in read.readlines() if len(line.strip()) > 0]

        self.xml_list = []
        # check file
        for xml_path in xml_list:
            if os.path.exists(xml_path) is False:
                print(f"Warning: not found '{xml_path}', skip this annotation file.")
                continue

            # check for targets
            with open(xml_path) as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = self.parse_xml_to_dict(xml)["annotation"]
            if "object" not in data:
                print(f"INFO: no objects in {xml_path}, skip this annotation file.")
                continue

            self.xml_list.append(xml_path)

        assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)

        # read class_indict
        json_file = './pascal_voc_classes.json'
        assert os.path.exists(json_file), "{} file not exist.".format(json_file)
        with open(json_file, 'r') as f:
            self.class_dict = json.load(f)

        self.transforms = transforms

    def __len__(self):
        return len(self.xml_list)

    def __getitem__(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        img_path = os.path.join(self.img_root, data["filename"])
        image = Image.open(img_path)
        if image.format != "JPEG":
            raise ValueError("Image '{}' format not JPEG".format(img_path))

        boxes = []
        labels = []
        iscrowd = []
        assert "object" in data, "{} lack of object information.".format(xml_path)
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])

            # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan
            if xmax <= xmin or ymax <= ymin:
                print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path))
                continue
            
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            if "difficult" in obj:
                iscrowd.append(int(obj["difficult"]))
            else:
                iscrowd.append(0)

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def get_height_and_width(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        return data_height, data_width

    def parse_xml_to_dict(self, xml):
        """
        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
        Args:
            xml: xml tree obtained by parsing XML file contents using lxml.etree

        Returns:
            Python dictionary holding XML contents.
        """

        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
            return {xml.tag: xml.text}

        result = {}
        for child in xml:
            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}

    def coco_index(self, idx):
        """
        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理
        由于不用去读取图片，可大幅缩减统计时间

        Args:
            idx: 输入需要获取图像的索引
        """
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        # img_path = os.path.join(self.img_root, data["filename"])
        # image = Image.open(img_path)
        # if image.format != "JPEG":
        #     raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        return (data_height, data_width), target

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))

# import transforms
# from draw_box_utils import draw_objs
# from PIL import Image
# import json
# import matplotlib.pyplot as plt
# import torchvision.transforms as ts
# import random
#
# # read class_indict
# category_index = {}
# try:
#     json_file = open('./pascal_voc_classes.json', 'r')
#     class_dict = json.load(json_file)
#     category_index = {str(v): str(k) for k, v in class_dict.items()}
# except Exception as e:
#     print(e)
#     exit(-1)
#
# data_transform = {
#     "train": transforms.Compose([transforms.ToTensor(),
#                                  transforms.RandomHorizontalFlip(0.5)]),
#     "val": transforms.Compose([transforms.ToTensor()])
# }
#
# # load train data set
# train_data_set = VOCDataSet(os.getcwd(), "2012", data_transform["train"], "train.txt")
# print(len(train_data_set))
# for index in random.sample(range(0, len(train_data_set)), k=5):
#     img, target = train_data_set[index]
#     img = ts.ToPILImage()(img)
#     plot_img = draw_objs(img,
#                          target["boxes"].numpy(),
#                          target["labels"].numpy(),
#                          np.ones(target["labels"].shape[0]),
#                          category_index=category_index,
#                          box_thresh=0.5,
#                          line_thickness=3,
#                          font='arial.ttf',
#                          font_size=20)
#     plt.imshow(plot_img)
#     plt.show()


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/__init__.py
================================================
from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor
from .rpn_function import AnchorsGenerator


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/boxes.py
================================================
import torch
from typing import Tuple
from torch import Tensor
import torchvision


def nms(boxes, scores, iou_threshold):
    # type: (Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Parameters
    ----------
    boxes : Tensor[N, 4])
        boxes to perform NMS on. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    iou_threshold : float
        discards all overlapping
        boxes with IoU > iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices
        of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)


def batched_nms(boxes, scores, idxs, iou_threshold):
    # type: (Tensor, Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression in a batched fashion.

    Each index value correspond to a category, and NMS
    will not be applied between elements of different categories.

    Parameters
    ----------
    boxes : Tensor[N, 4]
        boxes where NMS will be performed. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    idxs : Tensor[N]
        indices of the categories for each one of the boxes.
    iou_threshold : float
        discards all overlapping boxes
        with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices of
        the elements that have been kept by NMS, sorted
        in decreasing order of scores
    """
    if boxes.numel() == 0:
        return torch.empty((0,), dtype=torch.int64, device=boxes.device)

    # strategy: in order to perform NMS independently per class.
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）
    max_coordinate = boxes.max()

    # to(): Performs Tensor dtype and/or device conversion
    # 为每一个类别/每一层生成一个很大的偏移量
    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致
    offsets = idxs.to(boxes) * (max_coordinate + 1)
    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    return keep


def remove_small_boxes(boxes, min_size):
    # type: (Tensor, float) -> Tensor
    """
    Remove boxes which contains at least one side smaller than min_size.
    移除宽高小于指定阈值的索引
    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        min_size (float): minimum size

    Returns:
        keep (Tensor[K]): indices of the boxes that have both sides
            larger than min_size
    """
    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高
    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True
    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))
    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input
    # keep = keep.nonzero().squeeze(1)
    keep = torch.where(keep)[0]
    return keep


def clip_boxes_to_image(boxes, size):
    # type: (Tensor, Tuple[int, int]) -> Tensor
    """
    Clip boxes so that they lie inside an image of size `size`.
    裁剪预测的boxes信息，将越界的坐标调整到图片边界上

    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        size (Tuple[height, width]): size of the image

    Returns:
        clipped_boxes (Tensor[N, 4])
    """
    dim = boxes.dim()
    boxes_x = boxes[..., 0::2]  # x1, x2
    boxes_y = boxes[..., 1::2]  # y1, y2
    height, width = size

    if torchvision._is_tracing():
        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
    else:
        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间
        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间

    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
    return clipped_boxes.reshape(boxes.shape)


def box_area(boxes):
    """
    Computes the area of a set of bounding boxes, which are specified by its
    (x1, y1, x2, y2) coordinates.

    Arguments:
        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
            are expected to be in (x1, y1, x2, y2) format

    Returns:
        area (Tensor[N]): area for each box
    """
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])


def box_iou(boxes1, boxes2):
    """
    Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

    Arguments:
        boxes1 (Tensor[N, 4])
        boxes2 (Tensor[M, 4])

    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    #  When the shapes do not match,
    #  the shape of the returned output tensor follows the broadcasting rules
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    iou = inter / (area1[:, None] + area2 - inter)
    return iou


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/det_utils.py
================================================
import torch
import math
from typing import List, Tuple
from torch import Tensor


class BalancedPositiveNegativeSampler(object):
    """
    This class samples batches, ensuring that they contain a fixed proportion of positives
    """

    def __init__(self, batch_size_per_image, positive_fraction):
        # type: (int, float) -> None
        """
        Arguments:
            batch_size_per_image (int): number of elements to be selected per image
            positive_fraction (float): percentage of positive elements per batch
        """
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction

    def __call__(self, matched_idxs):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        Arguments:
            matched idxs: list of tensors containing -1, 0 or positive values.
                Each tensor corresponds to a specific image.
                -1 values are ignored, 0 are considered as negatives and > 0 as
                positives.

        Returns:
            pos_idx (list[tensor])
            neg_idx (list[tensor])

        Returns two lists of binary masks for each image.
        The first list contains the positive elements that were selected,
        and the second list the negative example.
        """
        pos_idx = []
        neg_idx = []
        # 遍历每张图像的matched_idxs
        for matched_idxs_per_image in matched_idxs:
            # >= 1的为正样本, nonzero返回非零元素索引
            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]
            # = 0的为负样本
            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]

            # 指定正样本的数量
            num_pos = int(self.batch_size_per_image * self.positive_fraction)
            # protect against not enough positive examples
            # 如果正样本数量不够就直接采用所有正样本
            num_pos = min(positive.numel(), num_pos)
            # 指定负样本数量
            num_neg = self.batch_size_per_image - num_pos
            # protect against not enough negative examples
            # 如果负样本数量不够就直接采用所有负样本
            num_neg = min(negative.numel(), num_neg)

            # randomly select positive and negative examples
            # Returns a random permutation of integers from 0 to n - 1.
            # 随机选择指定数量的正负样本
            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]

            pos_idx_per_image = positive[perm1]
            neg_idx_per_image = negative[perm2]

            # create binary mask from indices
            pos_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )
            neg_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )

            pos_idx_per_image_mask[pos_idx_per_image] = 1
            neg_idx_per_image_mask[neg_idx_per_image] = 1

            pos_idx.append(pos_idx_per_image_mask)
            neg_idx.append(neg_idx_per_image_mask)

        return pos_idx, neg_idx


@torch.jit._script_if_tracing
def encode_boxes(reference_boxes, proposals, weights):
    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
    """
    Encode a set of proposals with respect to some
    reference boxes

    Arguments:
        reference_boxes (Tensor): reference boxes(gt)
        proposals (Tensor): boxes to be encoded(anchors)
        weights:
    """

    # perform some unpacking to make it JIT-fusion friendly
    wx = weights[0]
    wy = weights[1]
    ww = weights[2]
    wh = weights[3]

    # unsqueeze()
    # Returns a new tensor with a dimension of size one inserted at the specified position.
    proposals_x1 = proposals[:, 0].unsqueeze(1)
    proposals_y1 = proposals[:, 1].unsqueeze(1)
    proposals_x2 = proposals[:, 2].unsqueeze(1)
    proposals_y2 = proposals[:, 3].unsqueeze(1)

    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)
    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)
    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)
    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)

    # implementation starts here
    # parse widths and heights
    ex_widths = proposals_x2 - proposals_x1
    ex_heights = proposals_y2 - proposals_y1
    # parse coordinate of center point
    ex_ctr_x = proposals_x1 + 0.5 * ex_widths
    ex_ctr_y = proposals_y1 + 0.5 * ex_heights

    gt_widths = reference_boxes_x2 - reference_boxes_x1
    gt_heights = reference_boxes_y2 - reference_boxes_y1
    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths
    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights

    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = ww * torch.log(gt_widths / ex_widths)
    targets_dh = wh * torch.log(gt_heights / ex_heights)

    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
    return targets


class BoxCoder(object):
    """
    This class encodes and decodes a set of bounding boxes into
    the representation used for training the regressors.
    """

    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
        # type: (Tuple[float, float, float, float], float) -> None
        """
        Arguments:
            weights (4-element tuple)
            bbox_xform_clip (float)
        """
        self.weights = weights
        self.bbox_xform_clip = bbox_xform_clip

    def encode(self, reference_boxes, proposals):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        结合anchors和与之对应的gt计算regression参数
        Args:
            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes
            proposals: List[Tensor] anchors/proposals

        Returns: regression parameters

        """
        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开
        # reference_boxes和proposal数据结构相同
        boxes_per_image = [len(b) for b in reference_boxes]
        reference_boxes = torch.cat(reference_boxes, dim=0)
        proposals = torch.cat(proposals, dim=0)

        # targets_dx, targets_dy, targets_dw, targets_dh
        targets = self.encode_single(reference_boxes, proposals)
        return targets.split(boxes_per_image, 0)

    def encode_single(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """
        dtype = reference_boxes.dtype
        device = reference_boxes.device
        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)
        targets = encode_boxes(reference_boxes, proposals, weights)

        return targets

    def decode(self, rel_codes, boxes):
        # type: (Tensor, List[Tensor]) -> Tensor
        """

        Args:
            rel_codes: bbox regression parameters
            boxes: anchors/proposals

        Returns:

        """
        assert isinstance(boxes, (list, tuple))
        assert isinstance(rel_codes, torch.Tensor)
        boxes_per_image = [b.size(0) for b in boxes]
        concat_boxes = torch.cat(boxes, dim=0)

        box_sum = 0
        for val in boxes_per_image:
            box_sum += val

        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标
        pred_boxes = self.decode_single(
            rel_codes, concat_boxes
        )

        # 防止pred_boxes为空时导致reshape报错
        if box_sum > 0:
            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)

        return pred_boxes

    def decode_single(self, rel_codes, boxes):
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Arguments:
            rel_codes (Tensor): encoded boxes (bbox regression parameters)
            boxes (Tensor): reference boxes (anchors/proposals)
        """
        boxes = boxes.to(rel_codes.dtype)

        # xmin, ymin, xmax, ymax
        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度
        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度
        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标
        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标

        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]
        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数
        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数
        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数
        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数

        # limit max value, prevent sending too large values into torch.exp()
        # self.bbox_xform_clip=math.log(1000. / 16)   4.135
        dw = torch.clamp(dw, max=self.bbox_xform_clip)
        dh = torch.clamp(dh, max=self.bbox_xform_clip)

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        # xmin
        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymin
        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
        # xmax
        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymax
        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h

        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
        return pred_boxes


class Matcher(object):
    BELOW_LOW_THRESHOLD = -1
    BETWEEN_THRESHOLDS = -2

    __annotations__ = {
        'BELOW_LOW_THRESHOLD': int,
        'BETWEEN_THRESHOLDS': int,
    }

    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
        # type: (float, float, bool) -> None
        """
        Args:
            high_threshold (float): quality values greater than or equal to
                this value are candidate matches.
            low_threshold (float): a lower quality threshold used to stratify
                matches into three levels:
                1) matches >= high_threshold
                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
            allow_low_quality_matches (bool): if True, produce additional matches
                for predictions that have only low-quality match candidates. See
                set_low_quality_matches_ for more details.
        """
        self.BELOW_LOW_THRESHOLD = -1
        self.BETWEEN_THRESHOLDS = -2
        assert low_threshold <= high_threshold
        self.high_threshold = high_threshold  # 0.7
        self.low_threshold = low_threshold    # 0.3
        self.allow_low_quality_matches = allow_low_quality_matches

    def __call__(self, match_quality_matrix):
        """
        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，
        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        """
        if match_quality_matrix.numel() == 0:
            # empty targets or proposals not supported during training
            if match_quality_matrix.shape[0] == 0:
                raise ValueError(
                    "No ground-truth boxes available for one of the images "
                    "during training")
            else:
                raise ValueError(
                    "No proposal boxes available for one of the images "
                    "during training")

        # match_quality_matrix is M (gt) x N (predicted)
        # Max over gt elements (dim 0) to find best gt candidate for each prediction
        # M x N 的每一列代表一个anchors与所有gt的匹配iou值
        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值
        # matches对应最大值所在的索引
        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.
        if self.allow_low_quality_matches:
            all_matches = matches.clone()
        else:
            all_matches = None

        # Assign candidate matches with low quality to negative (unassigned) values
        # 计算iou小于low_threshold的索引
        below_low_threshold = matched_vals < self.low_threshold
        # 计算iou在low_threshold与high_threshold之间的索引值
        between_thresholds = (matched_vals >= self.low_threshold) & (
            matched_vals < self.high_threshold
        )
        # iou小于low_threshold的matches索引置为-1
        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1

        # iou在[low_threshold, high_threshold]之间的matches索引置为-2
        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2

        if self.allow_low_quality_matches:
            assert all_matches is not None
            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)

        return matches

    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
        """
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        """
        # For each gt, find the prediction with which it has highest quality
        # 对于每个gt boxes寻找与其iou最大的anchor，
        # highest_quality_foreach_gt为匹配到的最大iou值
        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.

        # Find highest quality match available, even if it is low, including ties
        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor
        # gt_pred_pairs_of_highest_quality = torch.nonzero(
        #     match_quality_matrix == highest_quality_foreach_gt[:, None]
        # )
        gt_pred_pairs_of_highest_quality = torch.where(
            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])
        )
        # Example gt_pred_pairs_of_highest_quality:
        #   tensor([[    0, 39796],
        #           [    1, 32055],
        #           [    1, 32070],
        #           [    2, 39190],
        #           [    2, 40255],
        #           [    3, 40390],
        #           [    3, 41455],
        #           [    4, 45470],
        #           [    5, 45325],
        #           [    5, 46390]])
        # Each row is a (gt index, prediction index)
        # Note how gt items 1, 2, 3, and 5 each have two ties

        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)
        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]
        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值
        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]


def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):
    """
    very similar to the smooth_l1_loss from pytorch, but with
    the extra beta parameter
    """
    n = torch.abs(input - target)
    # cond = n < beta
    cond = torch.lt(n, beta)
    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
    if size_average:
        return loss.mean()
    return loss.sum()


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/faster_rcnn_framework.py
================================================
import warnings
from collections import OrderedDict
from typing import Tuple, List, Dict, Optional, Union

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torchvision.ops import MultiScaleRoIAlign

from .roi_head import RoIHeads
from .transform import GeneralizedRCNNTransform
from .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork


class FasterRCNNBase(nn.Module):
    """
    Main class for Generalized R-CNN.

    Arguments:
        backbone (nn.Module):
        rpn (nn.Module):
        roi_heads (nn.Module): takes the features + the proposals from the RPN and computes
            detections / masks from it.
        transform (nn.Module): performs the data transformation from the inputs to feed into
            the model
    """

    def __init__(self, backbone, rpn, roi_heads, transform):
        super(FasterRCNNBase, self).__init__()
        self.transform = transform
        self.backbone = backbone
        self.rpn = rpn
        self.roi_heads = roi_heads
        # used only on torchscript mode
        self._has_warned = False

    @torch.jit.unused
    def eager_outputs(self, losses, detections):
        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]
        if self.training:
            return losses

        return detections

    def forward(self, images, targets=None):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Arguments:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")

        if self.training:
            assert targets is not None
            for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError("Expected target boxes to be a tensor"
                                         "of shape [N, 4], got {:}.".format(
                                          boxes.shape))
                else:
                    raise ValueError("Expected target boxes to be of type "
                                     "Tensor, got {:}.".format(type(boxes)))

        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2  # 防止输入的是个一维向量
            original_image_sizes.append((val[0], val[1]))
        # original_image_sizes = [img.shape[-2:] for img in images]

        images, targets = self.transform(images, targets)  # 对图像进行预处理

        # print(images.tensors.shape)
        features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图
        if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’
            features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典

        # 将特征层以及标注target信息传入rpn中
        # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],
        # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式
        proposals, proposal_losses = self.rpn(images, features, targets)

        # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分
        detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)

        # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)

        if torch.jit.is_scripting():
            if not self._has_warned:
                warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting")
                self._has_warned = True
            return losses, detections
        else:
            return self.eager_outputs(losses, detections)

        # if self.training:
        #     return losses
        #
        # return detections


class TwoMLPHead(nn.Module):
    """
    Standard heads for FPN-based models

    Arguments:
        in_channels (int): number of input channels
        representation_size (int): size of the intermediate representation
    """

    def __init__(self, in_channels, representation_size):
        super(TwoMLPHead, self).__init__()

        self.fc6 = nn.Linear(in_channels, representation_size)
        self.fc7 = nn.Linear(representation_size, representation_size)

    def forward(self, x):
        x = x.flatten(start_dim=1)

        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))

        return x


class FastRCNNPredictor(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Arguments:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes):
        super(FastRCNNPredictor, self).__init__()
        self.cls_score = nn.Linear(in_channels, num_classes)
        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas


class FasterRCNN(FasterRCNNBase):
    """
    Implements Faster R-CNN.

    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
    image, and should be in 0-1 range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values
          between 0 and H and 0 and W
        - labels (Int64Tensor[N]): the class label for each ground-truth box

    The model returns a Dict[Tensor] during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
    follows:
        - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between
          0 and H and 0 and W
        - labels (Int64Tensor[N]): the predicted labels for each image
        - scores (Tensor[N]): the scores or each prediction

    Arguments:
        backbone (nn.Module): the network used to compute the features for the model.
            It should contain a out_channels attribute, which indicates the number of output
            channels that each feature map has (and it should be the same for all feature maps).
            The backbone should return a single Tensor or and OrderedDict[Tensor].
        num_classes (int): number of output classes of the model (including the background).
            If box_predictor is specified, num_classes should be None.
        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
        image_mean (Tuple[float, float, float]): mean values used for input normalization.
            They are generally the mean values of the dataset on which the backbone has been trained
            on
        image_std (Tuple[float, float, float]): std values used for input normalization.
            They are generally the std values of the dataset on which the backbone has been trained on
        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training of the RPN.
        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training of the RPN.
        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
            for computing the loss
        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
        rpn_score_thresh (float): during inference, only return proposals with a classification score
            greater than rpn_score_thresh
        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
            the locations indicated by the bounding boxes
        box_head (nn.Module): module that takes the cropped feature maps as input
        box_predictor (nn.Module): module that takes the output of box_head and returns the
            classification logits and box regression deltas.
        box_score_thresh (float): during inference, only return proposals with a classification score
            greater than box_score_thresh
        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
        box_detections_per_img (int): maximum number of detections per image, for all classes.
        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
            considered as positive during training of the classification head
        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
            considered as negative during training of the classification head
        box_batch_size_per_image (int): number of proposals that are sampled during training of the
            classification head
        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
            of the classification head
        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
            bounding boxes

    """

    def __init__(self, backbone, num_classes=None,
                 # transform parameter
                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸
                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数
                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例
                 rpn_score_thresh=0.0,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值
                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例
                 bbox_reg_weights=None):
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels"
                "specifying the number of output channels  (assumed to be the"
                "same for all the levels"
            )

        assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError("num_classes should be None when box_predictor "
                                 "is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should not be None when box_predictor "
                                 "is not specified")

        # 预测特征层的channels
        out_channels = backbone.out_channels

        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器
        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorsGenerator(
                anchor_sizes, aspect_ratios
            )

        # 生成RPN通过滑动窗口预测网络部分
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,
        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,
        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

        # 定义整个RPN框架
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,
            score_thresh=rpn_score_thresh)

        #  Multi-scale RoIAlign pooling
        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling
                output_size=[7, 7],
                sampling_ratio=2)

        # fast RCNN中roi pooling后的展平处理两个全连接层部分
        if box_head is None:
            resolution = box_roi_pool.output_size[0]  # 默认等于7
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size
            )

        # 在box_head的输出上预测部分
        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                num_classes)

        # 将roi pooling, box_head以及box_predictor结合在一起
        roi_heads = RoIHeads(
            # box
            box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5
            box_batch_size_per_image, box_positive_fraction,  # 512  0.25
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]

        # 对数据进行标准化，缩放，打包成batch等处理部分
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/image_list.py
================================================
from typing import List, Tuple
from torch import Tensor


class ImageList(object):
    """
    Structure that holds a list of images (of possibly
    varying sizes) as a single tensor.
    This works by padding the images to the same size,
    and storing in a field the original sizes of each image
    """

    def __init__(self, tensors, image_sizes):
        # type: (Tensor, List[Tuple[int, int]]) -> None
        """
        Arguments:
            tensors (tensor) padding后的图像数据
            image_sizes (list[tuple[int, int]])  padding前的图像尺寸
        """
        self.tensors = tensors
        self.image_sizes = image_sizes

    def to(self, device):
        # type: (Device) -> ImageList # noqa
        cast_tensor = self.tensors.to(device)
        return ImageList(cast_tensor, self.image_sizes)


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/roi_head.py
================================================
from typing import Optional, List, Dict, Tuple

import torch
from torch import Tensor
import torch.nn.functional as F

from . import det_utils
from . import boxes as box_ops


def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    Computes the loss for Faster R-CNN.

    Arguments:
        class_logits : 预测类别概率信息，shape=[num_anchors, num_classes]
        box_regression : 预测边目标界框回归信息
        labels : 真实类别信息
        regression_targets : 真实目标边界框信息

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """

    labels = torch.cat(labels, dim=0)
    regression_targets = torch.cat(regression_targets, dim=0)

    # 计算类别损失信息
    classification_loss = F.cross_entropy(class_logits, labels)

    # get indices that correspond to the regression targets for
    # the corresponding ground truth labels, to be used with
    # advanced indexing
    # 返回标签类别大于0的索引
    # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1)
    sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0]

    # 返回标签类别大于0位置的类别信息
    labels_pos = labels[sampled_pos_inds_subset]

    # shape=[num_proposal, num_classes]
    N, num_classes = class_logits.shape
    box_regression = box_regression.reshape(N, -1, 4)

    # 计算边界框损失信息
    box_loss = det_utils.smooth_l1_loss(
        # 获取指定索引proposal的指定类别box信息
        box_regression[sampled_pos_inds_subset, labels_pos],
        regression_targets[sampled_pos_inds_subset],
        beta=1 / 9,
        size_average=False,
    ) / labels.numel()

    return classification_loss, box_loss


class RoIHeads(torch.nn.Module):
    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
    }

    def __init__(self,
                 box_roi_pool,   # Multi-scale RoIAlign pooling
                 box_head,       # TwoMLPHead
                 box_predictor,  # FastRCNNPredictor
                 # Faster R-CNN training
                 fg_iou_thresh, bg_iou_thresh,  # default: 0.5, 0.5
                 batch_size_per_image, positive_fraction,  # default: 512, 0.25
                 bbox_reg_weights,  # None
                 # Faster R-CNN inference
                 score_thresh,        # default: 0.05
                 nms_thresh,          # default: 0.5
                 detection_per_img):  # default: 100
        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,  # default: 0.5
            bg_iou_thresh,  # default: 0.5
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image,  # default: 512
            positive_fraction)     # default: 0.25

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool    # Multi-scale RoIAlign pooling
        self.box_head = box_head            # TwoMLPHead
        self.box_predictor = box_predictor  # FastRCNNPredictor

        self.score_thresh = score_thresh  # default: 0.05
        self.nms_thresh = nms_thresh      # default: 0.5
        self.detection_per_img = detection_per_img  # default: 100

    def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        为每个proposal匹配对应的gt_box，并划分到正负样本中
        Args:
            proposals:
            gt_boxes:
            gt_labels:

        Returns:

        """
        matched_idxs = []
        labels = []
        # 遍历每张图像的proposals, gt_boxes, gt_labels信息
        for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
            if gt_boxes_in_image.numel() == 0:  # 该张图像中没有gt框，为背景
                # background image
                device = proposals_in_image.device
                clamped_matched_idxs_in_image = torch.zeros(
                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device
                )
                labels_in_image = torch.zeros(
                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device
                )
            else:
                #  set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
                # 计算proposal与每个gt_box的iou重合度
                match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)

                # 计算proposal与每个gt_box匹配的iou最大值，并记录索引，
                # iou < low_threshold索引值为 -1， low_threshold <= iou < high_threshold索引值为 -2
                matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)

                # 限制最小值，防止匹配标签时出现越界的情况
                # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别（实际上并不是）,后续会进一步处理
                clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
                # 获取proposal匹配到的gt对应标签
                labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
                labels_in_image = labels_in_image.to(dtype=torch.int64)

                # label background (below the low threshold)
                # 将gt索引为-1的类别设置为0，即背景，负样本
                bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1
                labels_in_image[bg_inds] = 0

                # label ignore proposals (between low and high threshold)
                # 将gt索引为-2的类别设置为-1, 即废弃样本
                ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2
                labels_in_image[ignore_inds] = -1  # -1 is ignored by sampler

            matched_idxs.append(clamped_matched_idxs_in_image)
            labels.append(labels_in_image)
        return matched_idxs, labels

    def subsample(self, labels):
        # type: (List[Tensor]) -> List[Tensor]
        # BalancedPositiveNegativeSampler
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_inds = []
        # 遍历每张图片的正负样本索引
        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
            # 记录所有采集样本索引（包括正样本和负样本）
            # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
            img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
            sampled_inds.append(img_sampled_inds)
        return sampled_inds

    def add_gt_proposals(self, proposals, gt_boxes):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        将gt_boxes拼接到proposal后面
        Args:
            proposals: 一个batch中每张图像rpn预测的boxes
            gt_boxes:  一个batch中每张图像对应的真实目标边界框

        Returns:

        """
        proposals = [
            torch.cat((proposal, gt_box))
            for proposal, gt_box in zip(proposals, gt_boxes)
        ]
        return proposals

    def check_targets(self, targets):
        # type: (Optional[List[Dict[str, Tensor]]]) -> None
        assert targets is not None
        assert all(["boxes" in t for t in targets])
        assert all(["labels" in t for t in targets])

    def select_training_samples(self,
                                proposals,  # type: List[Tensor]
                                targets     # type: Optional[List[Dict[str, Tensor]]]
                                ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        """
        划分正负样本，统计对应gt的标签以及边界框回归信息
        list元素个数为batch_size
        Args:
            proposals: rpn预测的boxes
            targets:

        Returns:

        """

        # 检查target数据是否为空
        self.check_targets(targets)
        # 如果不加这句，jit.script会不通过(看不懂)
        assert targets is not None

        dtype = proposals[0].dtype
        device = proposals[0].device

        # 获取标注好的boxes以及labels信息
        gt_boxes = [t["boxes"].to(dtype) for t in targets]
        gt_labels = [t["labels"] for t in targets]

        # append ground-truth bboxes to proposal
        # 将gt_boxes拼接到proposal后面
        proposals = self.add_gt_proposals(proposals, gt_boxes)

        # get matching gt indices for each proposal
        # 为每个proposal匹配对应的gt_box，并划分到正负样本中
        matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
        # sample a fixed proportion of positive-negative proposals
        # 按给定数量和比例采样正负样本
        sampled_inds = self.subsample(labels)
        matched_gt_boxes = []
        num_images = len(proposals)

        # 遍历每张图像
        for img_id in range(num_images):
            # 获取每张图像的正负样本索引
            img_sampled_inds = sampled_inds[img_id]
            # 获取对应正负样本的proposals信息
            proposals[img_id] = proposals[img_id][img_sampled_inds]
            # 获取对应正负样本的真实类别信息
            labels[img_id] = labels[img_id][img_sampled_inds]
            # 获取对应正负样本的gt索引信息
            matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]

            gt_boxes_in_image = gt_boxes[img_id]
            if gt_boxes_in_image.numel() == 0:
                gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
            # 获取对应正负样本的gt box信息
            matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])

        # 根据gt和proposal计算边框回归参数（针对gt的）
        regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
        return proposals, labels, regression_targets

    def postprocess_detections(self,
                               class_logits,    # type: Tensor
                               box_regression,  # type: Tensor
                               proposals,       # type: List[Tensor]
                               image_shapes     # type: List[Tuple[int, int]]
                               ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        """
        对网络的预测数据进行后处理，包括
        （1）根据proposal以及预测的回归参数计算出最终bbox坐标
        （2）对预测类别结果进行softmax处理
        （3）裁剪预测的boxes信息，将越界的坐标调整到图片边界上
        （4）移除所有背景信息
        （5）移除低概率目标
        （6）移除小尺寸目标
        （7）执行nms处理，并按scores进行排序
        （8）根据scores排序返回前topk个目标
        Args:
            class_logits: 网络预测类别概率信息
            box_regression: 网络预测的边界框回归参数
            proposals: rpn输出的proposal
            image_shapes: 打包成batch前每张图像的宽高

        Returns:

        """
        device = class_logits.device
        # 预测目标类别数
        num_classes = class_logits.shape[-1]

        # 获取每张图像的预测bbox数量
        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
        # 根据proposal以及预测的回归参数计算出最终bbox坐标
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        # 对预测类别结果进行softmax处理
        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        # 根据每张图像的预测bbox数量分割结果
        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        # 遍历每张图像预测信息
        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
            # 裁剪预测的boxes信息，将越界的坐标调整到图片边界上
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove prediction with the background label
            # 移除索引为0的所有信息（0代表背景）
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            # 移除低概率目标，self.scores_thresh=0.05
            # gt: Computes input > other element-wise.
            # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1)
            inds = torch.where(torch.gt(scores, self.score_thresh))[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            # 移除小目标
            keep = box_ops.remove_small_boxes(boxes, min_size=1.)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximun suppression, independently done per class
            # 执行nms处理，执行后的结果会按照scores从大到小进行排序返回
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)

            # keep only topk scoring predictions
            # 获取scores排在前topk个预测目标
            keep = keep[:self.detection_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels

    def forward(self,
                features,       # type: Dict[str, Tensor]
                proposals,      # type: List[Tensor]
                image_shapes,   # type: List[Tuple[int, int]]
                targets=None    # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """

        # 检查targets的数据类型是否正确
        if targets is not None:
            for t in targets:
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t["boxes"].dtype in floating_point_types, "target boxes must of float type"
                assert t["labels"].dtype == torch.int64, "target labels must of int64 type"

        if self.training:
            # 划分正负样本，统计对应gt的标签以及边界框回归信息
            proposals, labels, regression_targets = self.select_training_samples(proposals, targets)
        else:
            labels = None
            regression_targets = None

        # 将采集样本通过Multi-scale RoIAlign pooling层
        # box_features_shape: [num_proposals, channel, height, width]
        box_features = self.box_roi_pool(features, proposals, image_shapes)

        # 通过roi_pooling后的两层全连接层
        # box_features_shape: [num_proposals, representation_size]
        box_features = self.box_head(box_features)

        # 接着分别预测目标类别和边界框回归参数
        class_logits, box_regression = self.box_predictor(box_features)

        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    {
                        "boxes": boxes[i],
                        "labels": labels[i],
                        "scores": scores[i],
                    }
                )

        return result, losses


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/rpn_function.py
================================================
from typing import List, Optional, Dict, Tuple

import torch
from torch import nn, Tensor
from torch.nn import functional as F
import torchvision

from . import det_utils
from . import boxes as box_ops
from .image_list import ImageList


@torch.jit.unused
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
    # type: (Tensor, int) -> Tuple[int, int]
    from torch.onnx import operators
    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
    pre_nms_top_n = torch.min(torch.cat(
        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),
         num_anchors), 0))

    return num_anchors, pre_nms_top_n


class AnchorsGenerator(nn.Module):
    __annotations__ = {
        "cell_anchors": Optional[List[torch.Tensor]],
        "_cache": Dict[str, List[torch.Tensor]]
    }

    """
    anchors生成器
    Module that generates anchors for a set of feature maps and
    image sizes.

    The module support computing anchors at multiple sizes and aspect ratios
    per feature map.

    sizes and aspect_ratios should have the same number of elements, and it should
    correspond to the number of feature maps.

    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
    per spatial location for feature map i.

    Arguments:
        sizes (Tuple[Tuple[int]]):
        aspect_ratios (Tuple[Tuple[float]]):
    """

    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):
        super(AnchorsGenerator, self).__init__()

        if not isinstance(sizes[0], (list, tuple)):
            # TODO change this
            sizes = tuple((s,) for s in sizes)
        if not isinstance(aspect_ratios[0], (list, tuple)):
            aspect_ratios = (aspect_ratios,) * len(sizes)

        assert len(sizes) == len(aspect_ratios)

        self.sizes = sizes
        self.aspect_ratios = aspect_ratios
        self.cell_anchors = None
        self._cache = {}

    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")):
        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor
        """
        compute anchor sizes
        Arguments:
            scales: sqrt(anchor_area)
            aspect_ratios: h/w ratios
            dtype: float32
            device: cpu/gpu
        """
        scales = torch.as_tensor(scales, dtype=dtype, device=device)
        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
        h_ratios = torch.sqrt(aspect_ratios)
        w_ratios = 1.0 / h_ratios

        # [r1, r2, r3]' * [s1, s2, s3]
        # number of elements is len(ratios)*len(scales)
        ws = (w_ratios[:, None] * scales[None, :]).view(-1)
        hs = (h_ratios[:, None] * scales[None, :]).view(-1)

        # left-top, right-bottom coordinate relative to anchor center(0, 0)
        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]
        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2

        return base_anchors.round()  # round 四舍五入

    def set_cell_anchors(self, dtype, device):
        # type: (torch.dtype, torch.device) -> None
        if self.cell_anchors is not None:
            cell_anchors = self.cell_anchors
            assert cell_anchors is not None
            # suppose that all anchors have the same device
            # which is a valid assumption in the current state of the codebase
            if cell_anchors[0].device == device:
                return

        # 根据提供的sizes和aspect_ratios生成anchors模板
        # anchors模板都是以(0, 0)为中心的anchor
        cell_anchors = [
            self.generate_anchors(sizes, aspect_ratios, dtype, device)
            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)
        ]
        self.cell_anchors = cell_anchors

    def num_anchors_per_location(self):
        # 计算每个预测特征层上每个滑动窗口的预测目标数
        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]

    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
    def grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """
        anchors position in grid coordinate axis map into origin image
        计算预测特征图对应原始图像上的所有anchors的坐标
        Args:
            grid_sizes: 预测特征矩阵的height和width
            strides: 预测特征矩阵上一步对应原始图像上的步距
        """
        anchors = []
        cell_anchors = self.cell_anchors
        assert cell_anchors is not None

        # 遍历每个预测特征层的grid_size，strides和cell_anchors
        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
            grid_height, grid_width = size
            stride_height, stride_width = stride
            device = base_anchors.device

            # For output anchor, compute [x_center, y_center, x_center, y_center]
            # shape: [grid_width] 对应原图上的x坐标(列)
            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width
            # shape: [grid_height] 对应原图上的y坐标(行)
            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height

            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)
            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵
            # shape: [grid_height, grid_width]
            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
            shift_x = shift_x.reshape(-1)
            shift_y = shift_y.reshape(-1)

            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量
            # shape: [grid_width*grid_height, 4]
            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)

            # For every (base anchor, output anchor) pair,
            # offset each zero-centered base anchor by the center of the output anchor.
            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)
            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)
            anchors.append(shifts_anchor.reshape(-1, 4))

        return anchors  # List[Tensor(all_num_anchors, 4)]

    def cached_grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """将计算得到的所有anchors信息进行缓存"""
        key = str(grid_sizes) + str(strides)
        # self._cache是字典类型
        if key in self._cache:
            return self._cache[key]
        anchors = self.grid_anchors(grid_sizes, strides)
        self._cache[key] = anchors
        return anchors

    def forward(self, image_list, feature_maps):
        # type: (ImageList, List[Tensor]) -> List[Tensor]
        # 获取每个预测特征层的尺寸(height, width)
        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])

        # 获取输入图像的height和width
        image_size = image_list.tensors.shape[-2:]

        # 获取变量类型和设备类型
        dtype, device = feature_maps[0].dtype, feature_maps[0].device

        # one step in feature map equate n pixel stride in origin image
        # 计算特征层上的一步等于原始图像上的步长
        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),
                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]

        # 根据提供的sizes和aspect_ratios生成anchors模板
        self.set_cell_anchors(dtype, device)

        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）
        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息
        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)

        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
        # 遍历一个batch中的每张图像
        for i, (image_height, image_width) in enumerate(image_list.image_sizes):
            anchors_in_image = []
            # 遍历每张预测特征图映射回原图的anchors坐标信息
            for anchors_per_feature_map in anchors_over_all_feature_maps:
                anchors_in_image.append(anchors_per_feature_map)
            anchors.append(anchors_in_image)
        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起
        # anchors是个list，每个元素为一张图像的所有anchors信息
        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
        # Clear the cache in case that memory leaks.
        self._cache.clear()
        return anchors


class RPNHead(nn.Module):
    """
    add a RPN head with classification and regression
    通过滑动窗口计算预测目标概率与bbox regression参数

    Arguments:
        in_channels: number of channels of the input feature
        num_anchors: number of anchors to be predicted
    """

    def __init__(self, in_channels, num_anchors):
        super(RPNHead, self).__init__()
        # 3x3 滑动窗口
        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
        # 计算预测的目标分数（这里的目标只是指前景或者背景）
        self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)
        # 计算预测的目标bbox regression参数
        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1)

        for layer in self.children():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight, std=0.01)
                torch.nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        logits = []
        bbox_reg = []
        for i, feature in enumerate(x):
            t = F.relu(self.conv(feature))
            logits.append(self.cls_logits(t))
            bbox_reg.append(self.bbox_pred(t))
        return logits, bbox_reg


def permute_and_flatten(layer, N, A, C, H, W):
    # type: (Tensor, int, int, int, int, int) -> Tensor
    """
    调整tensor顺序，并进行reshape
    Args:
        layer: 预测特征层上预测的目标概率或bboxes regression参数
        N: batch_size
        A: anchors_num_per_position
        C: classes_num or 4(bbox coordinate)
        H: height
        W: width

    Returns:
        layer: 调整tensor顺序，并reshape后的结果[N, -1, C]
    """
    # view和reshape功能是一样的，先展平所有元素在按照给定shape排列
    # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数
    # reshape则不需要依赖目标tensor是否在内存中是连续的
    # [batch_size, anchors_num_per_position * (C or 4), height, width]
    layer = layer.view(N, -1, C,  H, W)
    # 调换tensor维度
    layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]
    layer = layer.reshape(N, -1, C)
    return layer


def concat_box_prediction_layers(box_cls, box_regression):
    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    对box_cla和box_regression两个list中的每个预测特征层的预测信息
    的tensor排列顺序以及shape进行调整 -> [N, -1, C]
    Args:
        box_cls: 每个预测特征层上的预测目标概率
        box_regression: 每个预测特征层上的预测目标bboxes regression参数

    Returns:

    """
    box_cls_flattened = []
    box_regression_flattened = []

    # 遍历每个预测特征层
    for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):
        # [batch_size, anchors_num_per_position * classes_num, height, width]
        # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景
        N, AxC, H, W = box_cls_per_level.shape
        # # [batch_size, anchors_num_per_position * 4, height, width]
        Ax4 = box_regression_per_level.shape[1]
        # anchors_num_per_position
        A = Ax4 // 4
        # classes_num
        C = AxC // A

        # [N, -1, C]
        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W)
        box_cls_flattened.append(box_cls_per_level)

        # [N, -1, C]
        box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W)
        box_regression_flattened.append(box_regression_per_level)

    box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)  # start_dim, end_dim
    box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)
    return box_cls, box_regression


class RegionProposalNetwork(torch.nn.Module):
    """
    Implements Region Proposal Network (RPN).

    Arguments:
        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        head (nn.Module): module that computes the objectness and regression deltas
        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training of the RPN.
        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training of the RPN.
        batch_size_per_image (int): number of anchors that are sampled during training of the RPN
            for computing the loss
        positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
        pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should
            contain two fields: training and testing, to allow for different values depending
            on training or evaluation
        post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should
            contain two fields: training and testing, to allow for different values depending
            on training or evaluation
        nms_thresh (float): NMS threshold used for postprocessing the RPN proposals

    """
    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
        'pre_nms_top_n': Dict[str, int],
        'post_nms_top_n': Dict[str, int],
    }

    def __init__(self, anchor_generator, head,
                 fg_iou_thresh, bg_iou_thresh,
                 batch_size_per_image, positive_fraction,
                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_generator = anchor_generator
        self.head = head
        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        # use during training
        # 计算anchors与真实bbox的iou
        self.box_similarity = box_ops.box_iou

        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,  # 当iou大于fg_iou_thresh(0.7)时视为正样本
            bg_iou_thresh,  # 当iou小于bg_iou_thresh(0.3)时视为负样本
            allow_low_quality_matches=True
        )

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction  # 256, 0.5
        )

        # use during testing
        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
        self.score_thresh = score_thresh
        self.min_size = 1.

    def pre_nms_top_n(self):
        if self.training:
            return self._pre_nms_top_n['training']
        return self._pre_nms_top_n['testing']

    def post_nms_top_n(self):
        if self.training:
            return self._post_nms_top_n['training']
        return self._post_nms_top_n['testing']

    def assign_targets_to_anchors(self, anchors, targets):
        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]
        """
        计算每个anchors最匹配的gt，并划分为正样本，背景以及废弃的样本
        Args：
            anchors: (List[Tensor])
            targets: (List[Dict[Tensor])
        Returns:
            labels: 标记anchors归属类别（1, 0, -1分别对应正样本，背景，废弃的样本）
                    注意，在RPN中只有前景和背景，所有正样本的类别都是1，0代表背景
            matched_gt_boxes：与anchors匹配的gt
        """
        labels = []
        matched_gt_boxes = []
        # 遍历每张图像的anchors和targets
        for anchors_per_image, targets_per_image in zip(anchors, targets):
            gt_boxes = targets_per_image["boxes"]
            if gt_boxes.numel() == 0:
                device = anchors_per_image.device
                matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)
                labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)
            else:
                # 计算anchors与真实bbox的iou信息
                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
                match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)
                # 计算每个anchors与gt匹配iou最大的索引（如果iou<0.3索引置为-1，0.3<iou<0.7索引为-2）
                matched_idxs = self.proposal_matcher(match_quality_matrix)
                # get the targets corresponding GT for each proposal
                # NB: need to clamp the indices because we can have a single
                # GT in the image, and matched_idxs can be -2, which goes
                # out of bounds
                # 这里使用clamp设置下限0是为了方便取每个anchors对应的gt_boxes信息
                # 负样本和舍弃的样本都是负值，所以为了防止越界直接置为0
                # 因为后面是通过labels_per_image变量来记录正样本位置的，
                # 所以负样本和舍弃的样本对应的gt_boxes信息并没有什么意义，
                # 反正计算目标边界框回归损失时只会用到正样本。
                matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]

                # 记录所有anchors匹配后的标签(正样本处标记为1，负样本处标记为0，丢弃样本处标记为-2)
                labels_per_image = matched_idxs >= 0
                labels_per_image = labels_per_image.to(dtype=torch.float32)

                # background (negative examples)
                bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1
                labels_per_image[bg_indices] = 0.0

                # discard indices that are between thresholds
                inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2
                labels_per_image[inds_to_discard] = -1.0

            labels.append(labels_per_image)
            matched_gt_boxes.append(matched_gt_boxes_per_image)
        return labels, matched_gt_boxes

    def _get_top_n_idx(self, objectness, num_anchors_per_level):
        # type: (Tensor, List[int]) -> Tensor
        """
        获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值
        Args:
            objectness: Tensor(每张图像的预测目标概率信息 )
            num_anchors_per_level: List（每个预测特征层上的预测的anchors个数）
        Returns:

        """
        r = []  # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息
        offset = 0
        # 遍历每个预测特征层上的预测目标概率信息
        for ob in objectness.split(num_anchors_per_level, 1):
            if torchvision._is_tracing():
                num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())
            else:
                num_anchors = ob.shape[1]  # 预测特征层上的预测的anchors个数
                pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)

            # Returns the k largest elements of the given input tensor along a given dimension
            _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)
            r.append(top_n_idx + offset)
            offset += num_anchors
        return torch.cat(r, dim=1)

    def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]
        """
        筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标
        Args:
            proposals: 预测的bbox坐标
            objectness: 预测的目标概率
            image_shapes: batch中每张图片的size信息
            num_anchors_per_level: 每个预测特征层上预测anchors的数目

        Returns:

        """
        num_images = proposals.shape[0]
        device = proposals.device

        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        # Returns a tensor of size size filled with fill_value
        # levels负责记录分隔不同预测特征层上的anchors索引信息
        levels = [torch.full((n, ), idx, dtype=torch.int64, device=device)
                  for idx, n in enumerate(num_anchors_per_level)]
        levels = torch.cat(levels, 0)

        # Expand this tensor to the same size as objectness
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]  # [batch_size, 1]

        # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息
        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息
        proposals = proposals[batch_idx, top_n_idx]

        objectness_prob = torch.sigmoid(objectness)

        final_boxes = []
        final_scores = []
        # 遍历每张图像的相关预测信息
        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):
            # 调整预测的boxes信息，将越界的坐标调整到图片边界上
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)

            # 返回boxes满足宽，高都大于min_size的索引
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # 移除小概率boxes，参考下面这个链接
            # https://github.com/pytorch/vision/pull/3205
            keep = torch.where(torch.ge(scores, self.score_thresh))[0]  # ge: >=
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)

            # keep only topk scoring predictions
            keep = keep[: self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]

            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores

    def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):
        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
        """
        计算RPN损失，包括类别损失（前景与背景），bbox regression损失
        Arguments:
            objectness (Tensor)：预测的前景概率
            pred_bbox_deltas (Tensor)：预测的bbox regression
            labels (List[Tensor])：真实的标签 1, 0, -1（batch中每一张图片的labels对应List的一个元素中）
            regression_targets (List[Tensor])：真实的bbox regression

        Returns:
            objectness_loss (Tensor) : 类别损失
            box_loss (Tensor)：边界框回归损失
        """
        # 按照给定的batch_size_per_image, positive_fraction选择正负样本
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起，并获取非零位置的索引
        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0]
        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0]

        # 将所有正负样本索引拼接在一起
        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
        objectness = objectness.flatten()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        # 计算边界框回归损失
        box_loss = det_utils.smooth_l1_loss(
            pred_bbox_deltas[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        # 计算目标预测概率损失
        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss

    def forward(self,
                images,        # type: ImageList
                features,      # type: Dict[str, Tensor]
                targets=None   # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]
        """
        Arguments:
            images (ImageList): images for which we want to compute the predictions
            features (Dict[Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.

        Returns:
            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
                image.
            losses (Dict[Tensor]): the losses for the model during training. During
                testing, it is an empty dict.
        """
        # RPN uses all feature maps that are available
        # features是所有预测特征层组成的OrderedDict
        features = list(features.values())

        # 计算每个预测特征层上的预测目标概率和bboxes regression参数
        # objectness和pred_bbox_deltas都是list
        objectness, pred_bbox_deltas = self.head(features)

        # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size
        anchors = self.anchor_generator(images, features)

        # batch_size
        num_images = len(anchors)

        # numel() Returns the total number of elements in the input tensor.
        # 计算每个预测特征层上的对应的anchors数量
        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]

        # 调整内部tensor格式以及shape
        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness,
                                                                    pred_bbox_deltas)

        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标
        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)

        # 筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标
        boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)

        losses = {}
        if self.training:
            assert targets is not None
            # 计算每个anchors最匹配的gt，并将anchors进行分类，前景，背景以及废弃的anchors
            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
            # 结合anchors以及对应的gt，计算regression参数
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets
            )
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg
            }
        return boxes, losses


================================================
FILE: pytorch_object_detection/faster_rcnn/network_files/transform.py
================================================
import math
from typing import List, Tuple, Dict, Optional

import torch
from torch import nn, Tensor
import torchvision

from .image_list import ImageList


@torch.jit.unused
def _resize_image_onnx(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    from torch.onnx import operators
    im_shape = operators.shape_as_tensor(image)[-2:]
    min_size = torch.min(im_shape).to(dtype=torch.float32)
    max_size = torch.max(im_shape).to(dtype=torch.float32)
    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)

    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image


def _resize_image(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    im_shape = torch.tensor(image.shape[-2:])
    min_size = float(torch.min(im_shape))    # 获取高宽中的最小值
    max_size = float(torch.max(im_shape))    # 获取高宽中的最大值
    scale_factor = self_min_size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例

    # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长
    if max_size * scale_factor > self_max_size:
        scale_factor = self_max_size / max_size  # 将缩放比例设为指定最大边长和图片最大边长之比

    # interpolate利用插值的方法缩放图片
    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]
    # bilinear只支持4D Tensor
    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image


class GeneralizedRCNNTransform(nn.Module):
    """
    Performs input / target transformation before feeding the data to a GeneralizedRCNN
    model.

    The transformations it perform are:
        - input normalization (mean subtraction and std division)
        - input / target resizing to match min_size / max_size

    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
    """

    def __init__(self, min_size, max_size, image_mean, image_std):
        super(GeneralizedRCNNTransform, self).__init__()
        if not isinstance(min_size, (list, tuple)):
            min_size = (min_size,)
        self.min_size = min_size      # 指定图像的最小边长范围
        self.max_size = max_size      # 指定图像的最大边长范围
        self.image_mean = image_mean  # 指定图像在标准化处理中的均值
        self.image_std = image_std    # 指定图像在标准化处理中的方差

    def normalize(self, image):
        """标准化处理"""
        dtype, device = image.dtype, image.device
        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
        # [:, None, None]: shape [3] -> [3, 1, 1]
        return (image - mean[:, None, None]) / std[:, None, None]

    def torch_choice(self, k):
        # type: (List[int]) -> int
        """
        Implements `random.choice` via torch ops so it can be compiled with
        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
        is fixed.
        """
        index = int(torch.empty(1).uniform_(0., float(len(k))).item())
        return k[index]

    def resize(self, image, target):
        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
        """
        将图片缩放到指定的大小范围内，并对应缩放bboxes信息
        Args:
            image: 输入的图片
            target: 输入图片的相关信息（包括bboxes信息）

        Returns:
            image: 缩放后的图片
            target: 缩放bboxes后的图片相关信息
        """
        # image shape is [channel, height, width]
        h, w = image.shape[-2:]

        if self.training:
            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size
        else:
            # FIXME assume for now that testing uses the largest scale
            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size

        if torchvision._is_tracing():
            image = _resize_image_onnx(image, size, float(self.max_size))
        else:
            image = _resize_image(image, size, float(self.max_size))

        if target is None:
            return image, target

        bbox = target["boxes"]
        # 根据图像的缩放比例来缩放bbox
        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])
        target["boxes"] = bbox

        return image, target

    # _onnx_batch_images() is an implementation of
    # batch_images() that is supported by ONNX tracing.
    @torch.jit.unused
    def _onnx_batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        max_size = []
        for i in range(images[0].dim()):
            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
            max_size.append(max_size_i)
        stride = size_divisible
        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size = tuple(max_size)

        # work around for
        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
        # which is not yet supported in onnx
        padded_imgs = []
        for img in images:
            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])
            padded_imgs.append(padded_img)

        return torch.stack(padded_imgs)

    def max_by_axis(self, the_list):
        # type: (List[List[int]]) -> List[int]
        maxes = the_list[0]
        for sublist in the_list[1:]:
            for index, item in enumerate(sublist):
                maxes[index] = max(maxes[index], item)
        return maxes

    def batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        """
        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）
        Args:
            images: 输入的一批图片
            size_divisible: 将图像高和宽调整到该数的整数倍

        Returns:
            batched_imgs: 打包成一个batch后的tensor数据
        """

        if torchvision._is_tracing():
            # batch_images() does not export well to ONNX
            # call _onnx_batch_images() instead
            return self._onnx_batch_images(images, size_divisible)

        # 分别计算一个batch中所有图片中的最大channel, height, width
        max_size = self.max_by_axis([list(img.shape) for img in images])

        stride = float(size_divisible)
        # max_size = list(max_size)
        # 将height向上调整到stride的整数倍
        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
        # 将width向上调整到stride的整数倍
        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)

        # [batch, channel, height, width]
        batch_shape = [len(images)] + max_size

        # 创建shape为batch_shape且值全部为0的tensor
        batched_imgs = images[0].new_full(batch_shape, 0)
        for img, pad_img in zip(images, batched_imgs):
            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变
            # 这样保证输入到网络中一个batch的每张图片的shape相同
            # copy_: Copies the elements from src into self tensor and returns self
            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)

        return batched_imgs

    def postprocess(self,
                    result,                # type: List[Dict[str, Tensor]]
                    image_shapes,          # type: List[Tuple[int, int]]
                    original_image_sizes   # type: List[Tuple[int, int]]
                    ):
        # type: (...) -> List[Dict[str, Tensor]]
        """
        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        Args:
            result: list(dict), 网络的预测结果, len(result) == batch_size
            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size
            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size

        Returns:

        """
        if self.training:
            return result

        # 遍历每张图片的预测信息，将boxes信息还原回原尺度
        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
            boxes = pred["boxes"]
            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上
            result[i]["boxes"] = boxes
        return result

    def __repr__(self):
        """自定义输出实例化对象的信息，可通过print打印实例信息"""
        format_string = self.__class__.__name__ + '('
        _indent = '\n    '
        format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std)
        format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size,
                                                                                         self.max_size)
        format_string += '\n)'
        return format_string

    def forward(self,
                images,       # type: List[Tensor]
                targets=None  # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]
        images = [img for img in images]
        for i in range(len(images)):
            image = images[i]
            target_index = targets[i] if targets is not None else None

            if image.dim() != 3:
                raise ValueError("images is expected to be a list of 3d tensors "
                                 "of shape [C, H, W], got {}".format(image.shape))
            image = self.normalize(image)                # 对图像进行标准化处理
            image, target_index = self.resize(image, target_index)   # 对图像和对应的bboxes缩放到指定范围
            images[i] = image
            if targets is not None and target_index is not None:
                targets[i] = target_index

        # 记录resize后的图像尺寸
        image_sizes = [img.shape[-2:] for img in images]
        images = self.batch_images(images)  # 将images打包成一个batch
        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])

        for image_size in image_sizes:
            assert len(image_size) == 2
            image_sizes_list.append((image_size[0], image_size[1]))

        image_list = ImageList(images, image_sizes_list)
        return image_list, targets


def resize_boxes(boxes, original_size, new_size):
    # type: (Tensor, List[int], List[int]) -> Tensor
    """
    将boxes参数根据图像的缩放情况进行相应缩放

    Arguments:
        original_size: 图像缩放前的尺寸
        new_size: 图像缩放后的尺寸
    """
    ratios = [
        torch.tensor(s, dtype=torch.float32, device=boxes.device) /
        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
        for s, s_orig in zip(new_size, original_size)
    ]
    ratios_height, ratios_width = ratios
    # Removes a tensor dimension, boxes [minibatch, 4]
    # Returns a tuple of all slices along a given dimension, already without it.
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    xmin = xmin * ratios_width
    xmax = xmax * ratios_width
    ymin = ymin * ratios_height
    ymax = ymax * ratios_height
    return torch.stack((xmin, ymin, xmax, ymax), dim=1)


================================================
FILE: pytorch_object_detection/faster_rcnn/pascal_voc_classes.json
================================================
{
    "aeroplane": 1,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
}

================================================
FILE: pytorch_object_detection/faster_rcnn/plot_curve.py
================================================
import datetime
import matplotlib.pyplot as plt


def plot_loss_and_lr(train_loss, learning_rate):
    try:
        x = list(range(len(train_loss)))
        fig, ax1 = plt.subplots(1, 1)
        ax1.plot(x, train_loss, 'r', label='loss')
        ax1.set_xlabel("step")
        ax1.set_ylabel("loss")
        ax1.set_title("Train Loss and lr")
        plt.legend(loc='best')

        ax2 = ax1.twinx()
        ax2.plot(x, learning_rate, label='lr')
        ax2.set_ylabel("learning rate")
        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
        plt.legend(loc='best')

        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
        plt.close()
        print("successful save loss curve! ")
    except Exception as e:
        print(e)


def plot_map(mAP):
    try:
        x = list(range(len(mAP)))
        plt.plot(x, mAP, label='mAp')
        plt.xlabel('epoch')
        plt.ylabel('mAP')
        plt.title('Eval mAP')
        plt.xlim(0, len(mAP))
        plt.legend(loc='best')
        plt.savefig('./mAP.png')
        plt.close()
        print("successful save mAP curve!")
    except Exception as e:
        print(e)


================================================
FILE: pytorch_object_detection/faster_rcnn/predict.py
================================================
import os
import time
import json

import torch
import torchvision
from PIL import Image
import matplotlib.pyplot as plt

from torchvision import transforms
from network_files import FasterRCNN, FastRCNNPredictor, AnchorsGenerator
from backbone import resnet50_fpn_backbone, MobileNetV2
from draw_box_utils import draw_objs


def create_model(num_classes):
    # mobileNetv2+faster_RCNN
    # backbone = MobileNetV2().features
    # backbone.out_channels = 1280
    #
    # anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
    #                                     aspect_ratios=((0.5, 1.0, 2.0),))
    #
    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
    #                                                 output_size=[7, 7],
    #                                                 sampling_ratio=2)
    #
    # model = FasterRCNN(backbone=backbone,
    #                    num_classes=num_classes,
    #                    rpn_anchor_generator=anchor_generator,
    #                    box_roi_pool=roi_pooler)

    # resNet50+fpn+faster_RCNN
    # 注意，这里的norm_layer要和训练脚本中保持一致
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d)
    model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_score_thresh=0.5)

    return model


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = create_model(num_classes=21)

    # load train weights
    weights_path = "./save_weights/model.pth"
    assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {str(v): str(k) for k, v in class_dict.items()}

    # load image
    original_img = Image.open("./test.jpg")

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.ToTensor()])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        predictions = model(img.to(device))[0]
        t_end = time_synchronized()
        print("inference+NMS time: {}".format(t_end - t_start))

        predict_boxes = predictions["boxes"].to("cpu").numpy()
        predict_classes = predictions["labels"].to("cpu").numpy()
        predict_scores = predictions["scores"].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        plot_img = draw_objs(original_img,
                             predict_boxes,
                             predict_classes,
                             predict_scores,
                             category_index=category_index,
                             box_thresh=0.5,
                             line_thickness=3,
                             font='arial.ttf',
                             font_size=20)
        plt.imshow(plot_img)
        plt.show()
        # 保存预测的图片结果
        plot_img.save("test_result.jpg")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/faster_rcnn/record_mAP.txt
================================================
COCO results:
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.526
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.804
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.586
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.580
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.454
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.639
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.646
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.540
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693

mAP(IoU=0.5) for each category:
 aeroplane      : 0.8759546352558178
 bicycle        : 0.8554609242543677
 bird           : 0.8434943725365999
 boat           : 0.6753024837855667
 bottle         : 0.7185899054232459
 bus            : 0.8691082170432654
 car            : 0.8771002682431779
 cat            : 0.9169138943375639
 chair          : 0.6403466317122392
 cow            : 0.8285552434280278
 diningtable    : 0.6437938565684241
 dog            : 0.8745793980119227
 horse          : 0.8718238708874728
 motorbike      : 0.8910672301923952
 person         : 0.9047338725598096
 pottedplant    : 0.5808810399193133
 sheep          : 0.86045368568359
 sofa           : 0.7239390963388067
 train          : 0.8652277764020805
 tvmonitor      : 0.7683550206571649

================================================
FILE: pytorch_object_detection/faster_rcnn/requirements.txt
================================================
lxml
matplotlib
numpy
tqdm
torch==1.7.1
torchvision==0.8.2
pycocotools
Pillow


================================================
FILE: pytorch_object_detection/faster_rcnn/split_data.py
================================================
import os
import random


def main():
    random.seed(0)  # 设置随机种子，保证随机结果可复现

    files_path = "./VOCdevkit/VOC2012/Annotations"
    assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path)

    val_rate = 0.5

    files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)])
    files_num = len(files_name)
    val_index = random.sample(range(0, files_num), k=int(files_num*val_rate))
    train_files = []
    val_files = []
    for index, file_name in enumerate(files_name):
        if index in val_index:
            val_files.append(file_name)
        else:
            train_files.append(file_name)

    try:
        train_f = open("train.txt", "x")
        eval_f = open("val.txt", "x")
        train_f.write("\n".join(train_files))
        eval_f.write("\n".join(val_files))
    except FileExistsError as e:
        print(e)
        exit(1)


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/faster_rcnn/train_mobilenetv2.py
================================================
import os
import datetime

import torch
import torchvision

import transforms
from network_files import FasterRCNN, AnchorsGenerator
from backbone import MobileNetV2, vgg
from my_dataset import VOCDataSet
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from train_utils import train_eval_utils as utils


def create_model(num_classes):
    # https://download.pytorch.org/models/vgg16-397923af.pth
    # 如果使用vgg16的话就下载对应预训练权重并取消下面注释，接着把mobilenetv2模型对应的两行代码注释掉
    # vgg_feature = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features
    # backbone = torch.nn.Sequential(*list(vgg_feature._modules.values())[:-1])  # 删除features中最后一个Maxpool层
    # backbone.out_channels = 512

    # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features
    backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = "./"  # VOCdevkit
    aspect_ratio_group_factor = 3
    batch_size = 8
    amp = False  # 是否使用混合精度训练，需要GPU支持

    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, batch_size)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=nw,
                                                  collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    model = create_model(num_classes=21)
    # print(model)

    model.to(device)

    scaler = torch.cuda.amp.GradScaler() if amp else None

    train_loss = []
    learning_rate = []
    val_map = []

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    #  first frozen backbone and train 5 epochs                   #
    #  首先冻结前置特征提取网络权重（backbone），训练rpn以及最终预测网络部分 #
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    for param in model.backbone.parameters():
        param.requires_grad = False

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

    init_epochs = 5
    for epoch in range(init_epochs):
        # train for one epoch, printing every 10 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device, epoch, print_freq=50,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

    torch.save(model.state_dict(), "./save_weights/pretrain.pth")

    # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    #  second unfrozen backbone and train all network     #
    #  解冻前置特征提取网络权重（backbone），接着训练整个网络权重  #
    # # # # # # # # # # # # # # # # # # # # # # # # # # # #

    # 冻结backbone部分底层权重
    for name, parameter in model.backbone.named_parameters():
        split_name = name.split(".")[0]
        if split_name in ["0", "1", "2", "3"]:
            parameter.requires_grad = False
        else:
            parameter.requires_grad = True

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.33)
    num_epochs = 20
    for epoch in range(init_epochs, num_epochs+init_epochs, 1):
        # train for one epoch, printing every 50 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device, epoch, print_freq=50,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        # 仅保存最后5个epoch的权重
        if epoch in range(num_epochs+init_epochs)[-5:]:
            save_files = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch}
            torch.save(save_files, "./save_weights/mobile-model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    main()


================================================
FILE: pytorch_object_detection/faster_rcnn/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch

import transforms
from my_dataset import VOCDataSet
from backbone import resnet50_fpn_backbone
from network_files import FasterRCNN, FastRCNNPredictor
import train_utils.train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir


def create_model(num_classes):
    # 如果显存很小，建议使用默认的FrozenBatchNorm2d
    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,
                                     trainable_layers=3)
    # 训练自己数据集时不要修改这里的91，修改的是传入的num_classes参数
    model = FasterRCNN(backbone=backbone, num_classes=91)
    # 载入预训练模型权重
    # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
    weights_dict = torch.load("./backbone/fasterrcnn_resnet50_fpn_coco.pth", map_location='cpu')
    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # Data loading code
    print("Loading data")

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    if args.aspect_ratio_group_factor >= 0:
        # 统计所有图像比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + 20 classes
    model = create_model(num_classes=args.num_classes + 1)
    model.to(device)

    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        utils.evaluate(model, data_loader_test, device=device)
        return

    train_loss = []
    learning_rate = []
    val_map = []

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,
                                              device, epoch, args.print_freq,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update learning rate
        lr_scheduler.step()

        # evaluate after every epoch
        coco_info = utils.evaluate(model, data_loader_test, device=device)
        val_map.append(coco_info[1])  # pascal mAP

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_files = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch}
            if args.amp:
                save_files["scaler"] = scaler.state_dict()
            save_on_master(save_files,
                           os.path.join(args.output_dir, f'model_{epoch}.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    if args.rank in [-1, 0]:
        # plot loss and lr curve
        if len(train_loss) != 0 and len(learning_rate) != 0:
            from plot_curve import plot_loss_and_lr
            plot_loss_and_lr(train_loss, learning_rate)

        # plot mAP curve
        if len(val_map) != 0:
            from plot_curve import plot_map
            plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=20, type=int, metavar='N',
                        help='number of total epochs to run')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU
    parser.add_argument('--lr', default=0.02, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.StepLR的参数
    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 开启的进程数(注意不是线程)
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    parser.add_argument("--sync-bn", dest="sync_bn", help="Use sync batch norm", type=bool, default=False)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/faster_rcnn/train_res50_fpn.py
================================================
import os
import datetime

import torch

import transforms
from network_files import FasterRCNN, FastRCNNPredictor
from backbone import resnet50_fpn_backbone
from my_dataset import VOCDataSet
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from train_utils import train_eval_utils as utils


def create_model(num_classes, load_pretrain_weights=True):
    # 注意，这里的backbone默认使用的是FrozenBatchNorm2d，即不会去更新bn参数
    # 目的是为了防止batch_size太小导致效果更差(如果显存很小，建议使用默认的FrozenBatchNorm2d)
    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d
    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练
    # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth
    backbone = resnet50_fpn_backbone(pretrain_path="./backbone/resnet50.pth",
                                     norm_layer=torch.nn.BatchNorm2d,
                                     trainable_layers=3)
    # 训练自己数据集时不要修改这里的91，修改的是传入的num_classes参数
    model = FasterRCNN(backbone=backbone, num_classes=91)
    
    if load_pretrain_weights:
        # 载入预训练模型权重
        # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
        weights_dict = torch.load("./backbone/fasterrcnn_resnet50_fpn_coco.pth", map_location='cpu')
        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
        if len(missing_keys) != 0 or len(unexpected_keys) != 0:
            print("missing_keys: ", missing_keys)
            print("unexpected_keys: ", unexpected_keys)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)
    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      num_workers=nw,
                                                      collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    model = create_model(num_classes=args.num_classes + 1)
    # print(model)

    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.33)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if args.resume != "":
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])
        print("the training process from epoch{}...".format(args.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 10 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device=device, epoch=epoch,
                                              print_freq=50, warmup=True,
                                              scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_set_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=15, type=int, metavar='N',
                        help='number of total epochs to run')
    # 学习率
    parser.add_argument('--lr', default=0.01, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练的batch size
    parser.add_argument('--batch_size', default=8, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/faster_rcnn/train_utils/__init__.py
================================================
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .distributed_utils import init_distributed_mode, save_on_master, mkdir
from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator


================================================
FILE: pytorch_object_detection/faster_rcnn/train_utils/coco_eval.py
================================================
import json
from collections import defaultdict

import numpy as np
import copy
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util

from train_utils.distributed_utils import all_gather


class CocoEvaluator(object):
    def __init__(self, coco_gt, iou_types):
        assert isinstance(iou_types, (list, tuple))
        coco_gt = copy.deepcopy(coco_gt)
        self.coco_gt = coco_gt

        self.iou_types = iou_types
        self.coco_eval = {}
        for iou_type in iou_types:
            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)

        self.img_ids = []
        self.eval_imgs = {k: [] for k in iou_types}

    def update(self, predictions):
        img_ids = list(np.unique(list(predictions.keys())))
        self.img_ids.extend(img_ids)

        for iou_type in self.iou_types:
            results = self.prepare(predictions, iou_type)
            coco_dt = loadRes(self.coco_gt, results) if results else COCO()
            coco_eval = self.coco_eval[iou_type]

            coco_eval.cocoDt = coco_dt
            coco_eval.params.imgIds = list(img_ids)
            img_ids, eval_imgs = evaluate(coco_eval)

            self.eval_imgs[iou_type].append(eval_imgs)

    def synchronize_between_processes(self):
        for iou_type in self.iou_types:
            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])

    def accumulate(self):
        for coco_eval in self.coco_eval.values():
            coco_eval.accumulate()

    def summarize(self):
        for iou_type, coco_eval in self.coco_eval.items():
            print("IoU metric: {}".format(iou_type))
            coco_eval.summarize()

    def prepare(self, predictions, iou_type):
        if iou_type == "bbox":
            return self.prepare_for_coco_detection(predictions)
        elif iou_type == "segm":
            return self.prepare_for_coco_segmentation(predictions)
        elif iou_type == "keypoints":
            return self.prepare_for_coco_keypoint(predictions)
        else:
            raise ValueError("Unknown iou type {}".format(iou_type))

    def prepare_for_coco_detection(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "bbox": box,
                        "score": scores[k],
                    }
                    for k, box in enumerate(boxes)
                ]
            )
        return coco_results

    def prepare_for_coco_segmentation(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            scores = prediction["scores"]
            labels = prediction["labels"]
            masks = prediction["masks"]

            masks = masks > 0.5

            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            rles = [
                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                for mask in masks
            ]
            for rle in rles:
                rle["counts"] = rle["counts"].decode("utf-8")

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "segmentation": rle,
                        "score": scores[k],
                    }
                    for k, rle in enumerate(rles)
                ]
            )
        return coco_results

    def prepare_for_coco_keypoint(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            keypoints = prediction["keypoints"]
            keypoints = keypoints.flatten(start_dim=1).tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        'keypoints': keypoint,
                        "score": scores[k],
                    }
                    for k, keypoint in enumerate(keypoints)
                ]
            )
        return coco_results


def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)


def merge(img_ids, eval_imgs):
    all_img_ids = all_gather(img_ids)
    all_eval_imgs = all_gather(eval_imgs)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_imgs = []
    for p in all_eval_imgs:
        merged_eval_imgs.append(p)

    merged_img_ids = np.array(merged_img_ids)
    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)

    # keep only unique (and in sorted order) images
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_imgs = merged_eval_imgs[..., idx]

    return merged_img_ids, merged_eval_imgs


def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
    img_ids, eval_imgs = merge(img_ids, eval_imgs)
    img_ids = list(img_ids)
    eval_imgs = list(eval_imgs.flatten())

    coco_eval.evalImgs = eval_imgs
    coco_eval.params.imgIds = img_ids
    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)


#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################

# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions

def createIndex(self):
    # create index
    # print('creating index...')
    anns, cats, imgs = {}, {}, {}
    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
    if 'annotations' in self.dataset:
        for ann in self.dataset['annotations']:
            imgToAnns[ann['image_id']].append(ann)
            anns[ann['id']] = ann

    if 'images' in self.dataset:
        for img in self.dataset['images']:
            imgs[img['id']] = img

    if 'categories' in self.dataset:
        for cat in self.dataset['categories']:
            cats[cat['id']] = cat

    if 'annotations' in self.dataset and 'categories' in self.dataset:
        for ann in self.dataset['annotations']:
            catToImgs[ann['category_id']].append(ann['image_id'])

    # print('index created!')

    # create class members
    self.anns = anns
    self.imgToAnns = imgToAnns
    self.catToImgs = catToImgs
    self.imgs = imgs
    self.cats = cats


maskUtils = mask_util


def loadRes(self, resFile):
    """
    Load result file and return a result api object.
    :param   resFile (str)     : file name of result file
    :return: res (obj)         : result api object
    """
    res = COCO()
    res.dataset['images'] = [img for img in self.dataset['images']]

    # print('Loading and preparing results...')
    # tic = time.time()
    if isinstance(resFile, torch._six.string_classes):
        anns = json.load(open(resFile))
    elif type(resFile) == np.ndarray:
        anns = self.loadNumpyAnnotations(resFile)
    else:
        anns = resFile
    assert type(anns) == list, 'results in not an array of objects'
    annsImgIds = [ann['image_id'] for ann in anns]
    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
        'Results do not correspond to current coco set'
    if 'caption' in anns[0]:
        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
        for id, ann in enumerate(anns):
            ann['id'] = id + 1
    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            bb = ann['bbox']
            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
            if 'segmentation' not in ann:
                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
            ann['area'] = bb[2] * bb[3]
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'segmentation' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            # now only support compressed RLE format as segmentation results
            ann['area'] = maskUtils.area(ann['segmentation'])
            if 'bbox' not in ann:
                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'keypoints' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            s = ann['keypoints']
            x = s[0::3]
            y = s[1::3]
            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
            ann['area'] = (x2 - x1) * (y2 - y1)
            ann['id'] = id + 1
            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))

    res.dataset['annotations'] = anns
    createIndex(res)
    return res


def evaluate(self):
    '''
    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
    :return: None
    '''
    # tic = time.time()
    # print('Running per image evaluation...')
    p = self.params
    # add backward compatibility if useSegm is specified in params
    if p.useSegm is not None:
        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
    # print('Evaluate annotation type *{}*'.format(p.iouType))
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)
    self.params = p

    self._prepare()
    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]

    if p.iouType == 'segm' or p.iouType == 'bbox':
        computeIoU = self.computeIoU
    elif p.iouType == 'keypoints':
        computeIoU = self.computeOks
    self.ious = {
        (imgId, catId): computeIoU(imgId, catId)
        for imgId in p.imgIds
        for catId in catIds}

    evaluateImg = self.evaluateImg
    maxDet = p.maxDets[-1]
    evalImgs = [
        evaluateImg(imgId, catId, areaRng, maxDet)
        for catId in catIds
        for areaRng in p.areaRng
        for imgId in p.imgIds
    ]
    # this is NOT in the pycocotools code, but could be done outside
    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
    self._paramsEval = copy.deepcopy(self.params)
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format(toc-tic))
    return p.imgIds, evalImgs

#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################


================================================
FILE: pytorch_object_detection/faster_rcnn/train_utils/coco_utils.py
================================================
import torch
import torchvision
import torch.utils.data
from pycocotools.coco import COCO


def convert_to_coco_api(ds):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0
    ann_id = 1
    dataset = {'images': [], 'categories': [], 'annotations': []}
    categories = set()
    for img_idx in range(len(ds)):
        # find better way to get target
        hw, targets = ds.coco_index(img_idx)
        image_id = targets["image_id"].item()
        img_dict = {}
        img_dict['id'] = image_id
        img_dict['height'] = hw[0]
        img_dict['width'] = hw[1]
        dataset['images'].append(img_dict)
        bboxes = targets["boxes"]
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets['labels'].tolist()
        areas = targets['area'].tolist()
        iscrowd = targets['iscrowd'].tolist()
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {}
            ann['image_id'] = image_id
            ann['bbox'] = bboxes[i]
            ann['category_id'] = labels[i]
            categories.add(labels[i])
            ann['area'] = areas[i]
            ann['iscrowd'] = iscrowd[i]
            ann['id'] = ann_id
            dataset['annotations'].append(ann)
            ann_id += 1
    dataset['categories'] = [{'id': i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds


def get_coco_api_from_dataset(dataset):
    for _ in range(10):
        if isinstance(dataset, torchvision.datasets.CocoDetection):
            break
        if isinstance(dataset, torch.utils.data.Subset):
            dataset = dataset.dataset
    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return dataset.coco
    return convert_to_coco_api(dataset)


================================================
FILE: pytorch_object_detection/faster_rcnn/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,

                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    # 使用torch1.9或以上时建议加上device_ids=[args.rank]
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_object_detection/faster_rcnn/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k等份(2k+1个点，2k个区间)
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_object_detection/faster_rcnn/train_utils/train_eval_utils.py
================================================
import math
import sys
import time

import torch

from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator
import train_utils.distributed_utils as utils


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=50, warmup=False, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    mloss = torch.zeros(1).to(device)  # mean losses
    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purpose
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        # 记录训练损失
        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses

        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, device):

    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)

        # 当使用CPU时，跳过GPU相关指令
        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list

    return coco_info


def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    return iou_types


================================================
FILE: pytorch_object_detection/faster_rcnn/transforms.py
================================================
import random
from torchvision.transforms import functional as F


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class ToTensor(object):
    """将PIL图像转为Tensor"""
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target


class RandomHorizontalFlip(object):
    """随机水平翻转图像以及bboxes"""
    def __init__(self, prob=0.5):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)  # 水平翻转图片
            bbox = target["boxes"]
            # bbox: xmin, ymin, xmax, ymax
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
            target["boxes"] = bbox
        return image, target


================================================
FILE: pytorch_object_detection/faster_rcnn/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
以及每个类别的mAP(IoU=0.5)
"""

import os
import json

import torch
from tqdm import tqdm
import numpy as np

import transforms
from network_files import FasterRCNN
from backbone import resnet50_fpn_backbone
from my_dataset import VOCDataSet
from train_utils import get_coco_api_from_dataset, CocoEvaluator


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 12, [""] * 12
    stats[0], print_list[0] = _summarize(1)
    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])
    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])
    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])
    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {
        "val": transforms.Compose([transforms.ToTensor()])
    }

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {v: k for k, v in class_dict.items()}

    VOC_root = parser_data.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=1,
                                                     shuffle=False,
                                                     num_workers=nw,
                                                     pin_memory=True,
                                                     collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    # 注意，这里的norm_layer要和训练脚本中保持一致
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d)
    model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights_path
    assert os.path.exists(weights_path), "not found {} file.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    # print(model)

    model.to(device)

    # evaluate on the test dataset
    coco = get_coco_api_from_dataset(val_dataset)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)
    cpu_device = torch.device("cpu")

    model.eval()
    with torch.no_grad():
        for image, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            image = list(img.to(device) for img in image)

            # inference
            outputs = model(image)

            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            coco_evaluator.update(res)

    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_eval = coco_evaluator.coco_eval["bbox"]
    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_eval)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    for i in range(len(category_index)):
        stats, _ = summarize(coco_eval, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = ["COCO results:",
                        print_coco,
                        "",
                        "mAP(IoU=0.5) for each category:",
                        print_voc]
        f.write("\n".join(record_lines))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda', help='device')

    # 检测目标类别数
    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')

    # 数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data/', help='dataset root')

    # 训练好的权重文件
    parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights')

    # batch size
    parser.add_argument('--batch_size', default=1, type=int, metavar='N',
                        help='batch size when validation.')

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_object_detection/mask_rcnn/README.md
================================================
# Mask R-CNN

## 该项目参考自pytorch官方torchvision模块中的源码(使用pycocotools处略有不同)
* https://github.com/pytorch/vision/tree/master/references/detection

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10或以上
* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))
* Ubuntu或Centos(不建议Windows)
* 最好使用GPU训练
* 详细环境配置见`requirements.txt`

## 文件结构：
```
  ├── backbone: 特征提取网络
  ├── network_files: Mask R-CNN网络
  ├── train_utils: 训练验证相关模块（包括coco验证相关）
  ├── my_dataset_coco.py: 自定义dataset用于读取COCO2017数据集
  ├── my_dataset_voc.py: 自定义dataset用于读取Pascal VOC数据集
  ├── train.py: 单GPU/CPU训练脚本
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测
  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
  └── transforms.py: 数据预处理（随机水平翻转图像以及bboxes、将PIL图像转为Tensor）
```

## 预训练权重下载地址（下载后放入当前文件夹中）：
* Resnet50预训练权重 https://download.pytorch.org/models/resnet50-0676ba61.pth (注意，下载预训练权重后要重命名，
比如在train.py中读取的是`resnet50.pth`文件，不是`resnet50-0676ba61.pth`)
* Mask R-CNN(Resnet50+FPN)预训练权重 https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth (注意，
载预训练权重后要重命名，比如在train.py中读取的是`maskrcnn_resnet50_fpn_coco.pth`文件，不是`maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth`)
 
 
## 数据集，本例程使用的有COCO2017数据集和Pascal VOC2012数据集
### COCO2017数据集
* COCO官网地址：https://cocodataset.org/
* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/113247318
* 这里以下载coco2017数据集为例，主要下载三个文件：
    * `2017 Train images [118K/18GB]`：训练过程中使用到的所有图像文件
    * `2017 Val images [5K/1GB]`：验证过程中使用到的所有图像文件
    * `2017 Train/Val annotations [241MB]`：对应训练集和验证集的标注json文件
* 都解压到`coco2017`文件夹下，可得到如下文件夹结构：
```
├── coco2017: 数据集根目录
     ├── train2017: 所有训练图像文件夹(118287张)
     ├── val2017: 所有验证图像文件夹(5000张)
     └── annotations: 对应标注文件夹
              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件
              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件
              ├── captions_train2017.json: 对应图像描述的训练集标注文件
              ├── captions_val2017.json: 对应图像描述的验证集标注文件
              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件
              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹
```

### Pascal VOC2012数据集
* 数据集下载地址： http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit
* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/115787033
* 解压后得到的文件夹结构如下：
```
VOCdevkit
    └── VOC2012
         ├── Annotations               所有的图像标注信息(XML文件)
         ├── ImageSets
         │   ├── Action                人的行为动作图像信息
         │   ├── Layout                人的各个部位图像信息
         │   │
         │   ├── Main                  目标检测分类图像信息
         │   │     ├── train.txt       训练集(5717)
         │   │     ├── val.txt         验证集(5823)
         │   │     └── trainval.txt    训练集+验证集(11540)
         │   │
         │   └── Segmentation          目标分割图像信息
         │         ├── train.txt       训练集(1464)
         │         ├── val.txt         验证集(1449)
         │         └── trainval.txt    训练集+验证集(2913)
         │
         ├── JPEGImages                所有图像文件
         ├── SegmentationClass         语义分割png图（基于类别）
         └── SegmentationObject        实例分割png图（基于目标）
```

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 确保设置好`--num-classes`和`--data-path`
* 若要使用单GPU训练直接使用train.py训练脚本
* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`

## 注意事项
1. 在使用训练脚本时，注意要将`--data-path`设置为自己存放数据集的**根目录**：
```
# 假设要使用COCO数据集，启用自定义数据集读取CocoDetection并将数据集解压到成/data/coco2017目录下
python train.py --data-path /data/coco2017

# 假设要使用Pascal VOC数据集，启用自定义数据集读取VOCInstances并数据集解压到成/data/VOCdevkit目录下
python train.py --data-path /data/VOCdevkit
```

2. 如果倍增`batch_size`，建议学习率也跟着倍增。假设将`batch_size`从4设置成8，那么学习率`lr`从0.004设置成0.008
3. 如果使用Batch Normalization模块时，`batch_size`不能小于4，否则效果会变差。**如果显存不够，batch_size必须小于4时**，建议在创建`resnet50_fpn_backbone`时，
将`norm_layer`设置成`FrozenBatchNorm2d`或将`trainable_layers`设置成0(即冻结整个`backbone`)
4. 训练过程中保存的`det_results.txt`(目标检测任务)以及`seg_results.txt`(实例分割任务)是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率
5. 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。
6. 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时需要修改`--num-classes`、`--data-path`、`--weights-path`以及
`--label-json-path`（该参数是根据训练的数据集设置的）。其他代码尽量不要改动


## 复现结果
在COCO2017数据集上进行复现，训练过程中仅载入Resnet50的预训练权重，训练26个epochs。训练采用指令如下：
```
torchrun --nproc_per_node=8 train_multi_GPU.py --batch-size 8 --lr 0.08 --pretrain False --amp True
```

训练得到权重下载地址： https://pan.baidu.com/s/1qpXUIsvnj8RHY-V05J-mnA  密码: 63d5

在COCO2017验证集上的mAP(目标检测任务)：
```
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.381
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.588
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.411
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.215
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.492
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.315
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.499
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.523
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.565
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.666
```

在COCO2017验证集上的mAP(实例分割任务)：
```
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.340
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.552
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.361
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.151
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.369
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.500
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.290
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.449
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.468
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.266
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.509
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.619
```

## 如果对Mask RCNN原理不是很理解可参考我的bilibili
https://www.bilibili.com/video/BV1ZY411774T


================================================
FILE: pytorch_object_detection/mask_rcnn/backbone/__init__.py
================================================
from .resnet50_fpn_model import resnet50_fpn_backbone


================================================
FILE: pytorch_object_detection/mask_rcnn/backbone/feature_pyramid_network.py
================================================
from collections import OrderedDict

import torch.nn as nn
import torch
from torch import Tensor
import torch.nn.functional as F

from torch.jit.annotations import Tuple, List, Dict


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model
    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.
    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.
    Arguments:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model, return_layers):
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")

        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}
        layers = OrderedDict()

        # 遍历模型子模块按顺序存入有序字典
        # 只保存layer4及其之前的结构，舍去之后不用的结构
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super().__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = OrderedDict()
        # 依次遍历模型的所有子模块，并进行正向传播，
        # 收集layer1, layer2, layer3, layer4的输出
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class BackboneWithFPN(nn.Module):
    """
    Adds a FPN on top of a model.
    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
    extract a submodel that returns the feature maps specified in return_layers.
    The same limitations of IntermediatLayerGetter apply here.
    Arguments:
        backbone (nn.Module)
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
        in_channels_list (List[int]): number of channels for each feature map
            that is returned, in the order they are present in the OrderedDict
        out_channels (int): number of channels in the FPN.
        extra_blocks: ExtraFPNBlock
    Attributes:
        out_channels (int): the number of channels in the FPN
    """

    def __init__(self,
                 backbone: nn.Module,
                 return_layers=None,
                 in_channels_list=None,
                 out_channels=256,
                 extra_blocks=None,
                 re_getter=True):
        super().__init__()

        if extra_blocks is None:
            extra_blocks = LastLevelMaxPool()

        if re_getter:
            assert return_layers is not None
            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        else:
            self.body = backbone

        self.fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=extra_blocks,
        )

        self.out_channels = out_channels

    def forward(self, x):
        x = self.body(x)
        x = self.fpn(x)
        return x


class FeaturePyramidNetwork(nn.Module):
    """
    Module that adds a FPN from on top of a set of feature maps. This is based on
    `"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
    The feature maps are currently supposed to be in increasing depth
    order.
    The input to the model is expected to be an OrderedDict[Tensor], containing
    the feature maps on top of which the FPN will be added.
    Arguments:
        in_channels_list (list[int]): number of channels for each feature map that
            is passed to the module
        out_channels (int): number of channels of the FPN representation
        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
            be performed. It is expected to take the fpn features, the original
            features and the names of the original features as input, and returns
            a new list of feature maps and their corresponding names
    """

    def __init__(self, in_channels_list, out_channels, extra_blocks=None):
        super().__init__()
        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）
        self.inner_blocks = nn.ModuleList()
        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
        self.layer_blocks = nn.ModuleList()
        for in_channels in in_channels_list:
            if in_channels == 0:
                continue
            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
            self.inner_blocks.append(inner_block_module)
            self.layer_blocks.append(layer_block_module)

        # initialize parameters now to avoid modifying the initialization of top_blocks
        for m in self.children():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)

        self.extra_blocks = extra_blocks

    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.inner_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.inner_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.inner_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.layer_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.layer_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.layer_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
        """
        Computes the FPN for a set of feature maps.
        Arguments:
            x (OrderedDict[Tensor]): feature maps for each feature level.
        Returns:
            results (OrderedDict[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        # unpack OrderedDict into two lists for easier handling
        names = list(x.keys())
        x = list(x.values())

        # 将resnet layer4的channel调整到指定的out_channels
        # last_inner = self.inner_blocks[-1](x[-1])
        last_inner = self.get_result_from_inner_blocks(x[-1], -1)
        # result中保存着每个预测特征层
        results = []
        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵
        # results.append(self.layer_blocks[-1](last_inner))
        results.append(self.get_result_from_layer_blocks(last_inner, -1))

        for idx in range(len(x) - 2, -1, -1):
            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
            feat_shape = inner_lateral.shape[-2:]
            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
            last_inner = inner_lateral + inner_top_down
            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))

        # 在layer4对应的预测特征层基础上生成预测特征矩阵5
        if self.extra_blocks is not None:
            results, names = self.extra_blocks(results, x, names)

        # make it back an OrderedDict
        out = OrderedDict([(k, v) for k, v in zip(names, results)])

        return out


class LastLevelMaxPool(torch.nn.Module):
    """
    Applies a max_pool2d on top of the last feature map
    """

    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:
        names.append("pool")
        x.append(F.max_pool2d(x[-1], 1, 2, 0))
        return x, names


================================================
FILE: pytorch_object_detection/mask_rcnn/backbone/resnet50_fpn_model.py
================================================
import os

import torch
import torch.nn as nn
from torchvision.ops.misc import FrozenBatchNorm2d

from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = norm_layer(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = norm_layer(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def overwrite_eps(model, eps):
    """
    This method overwrites the default eps values of all the
    FrozenBatchNorm2d layers of the model with the provided value.
    This is necessary to address the BC-breaking change introduced
    by the bug-fix at pytorch/vision#2933. The overwrite is applied
    only when the pretrained weights are loaded to maintain compatibility
    with previous versions.

    Args:
        model (nn.Module): The model on which we perform the overwrite.
        eps (float): The new value of eps.
    """
    for module in model.modules():
        if isinstance(module, FrozenBatchNorm2d):
            module.eps = eps


def resnet50_fpn_backbone(pretrain_path="",
                          norm_layer=nn.BatchNorm2d,
                          trainable_layers=3,
                          returned_layers=None,
                          extra_blocks=None):
    """
    搭建resnet50_fpn——backbone
    Args:
        pretrain_path: resnet50的预训练权重，如果不使用就默认为空
        norm_layer: 默认是nn.BatchNorm2d，如果GPU显存很小，batch_size不能设置很大，
                    建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d)
                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
        trainable_layers: 指定训练哪些层结构
        returned_layers: 指定哪些层的输出需要返回
        extra_blocks: 在输出的特征层基础上额外添加的层结构

    Returns:

    """
    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],
                             include_top=False,
                             norm_layer=norm_layer)

    if isinstance(norm_layer, FrozenBatchNorm2d):
        overwrite_eps(resnet_backbone, 0.0)

    if pretrain_path != "":
        assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path)
        # 载入预训练权重
        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))

    # select layers that wont be frozen
    assert 0 <= trainable_layers <= 5
    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]

    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1
    if trainable_layers == 5:
        layers_to_train.append("bn1")

    # freeze layers
    for name, parameter in resnet_backbone.named_parameters():
        # 只训练不在layers_to_train列表中的层结构
        if all([not name.startswith(layer) for layer in layers_to_train]):
            parameter.requires_grad_(False)

    if extra_blocks is None:
        extra_blocks = LastLevelMaxPool()

    if returned_layers is None:
        returned_layers = [1, 2, 3, 4]
    # 返回的特征层个数肯定大于0小于5
    assert min(returned_layers) > 0 and max(returned_layers) < 5

    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}

    # in_channel 为layer4的输出特征矩阵channel = 2048
    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256
    # 记录resnet50提供给fpn的每个特征层channel
    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]
    # 通过fpn后得到的每个特征层的channel
    out_channels = 256
    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)


================================================
FILE: pytorch_object_detection/mask_rcnn/coco91_indices.json
================================================
{
    "1": "person",
    "2": "bicycle",
    "3": "car",
    "4": "motorcycle",
    "5": "airplane",
    "6": "bus",
    "7": "train",
    "8": "truck",
    "9": "boat",
    "10": "traffic light",
    "11": "fire hydrant",
    "12": "N/A",
    "13": "stop sign",
    "14": "parking meter",
    "15": "bench",
    "16": "bird",
    "17": "cat",
    "18": "dog",
    "19": "horse",
    "20": "sheep",
    "21": "cow",
    "22": "elephant",
    "23": "bear",
    "24": "zebra",
    "25": "giraffe",
    "26": "N/A",
    "27": "backpack",
    "28": "umbrella",
    "29": "N/A",
    "30": "N/A",
    "31": "handbag",
    "32": "tie",
    "33": "suitcase",
    "34": "frisbee",
    "35": "skis",
    "36": "snowboard",
    "37": "sports ball",
    "38": "kite",
    "39": "baseball bat",
    "40": "baseball glove",
    "41": "skateboard",
    "42": "surfboard",
    "43": "tennis racket",
    "44": "bottle",
    "45": "N/A",
    "46": "wine glass",
    "47": "cup",
    "48": "fork",
    "49": "knife",
    "50": "spoon",
    "51": "bowl",
    "52": "banana",
    "53": "apple",
    "54": "sandwich",
    "55": "orange",
    "56": "broccoli",
    "57": "carrot",
    "58": "hot dog",
    "59": "pizza",
    "60": "donut",
    "61": "cake",
    "62": "chair",
    "63": "couch",
    "64": "potted plant",
    "65": "bed",
    "66": "N/A",
    "67": "dining table",
    "68": "N/A",
    "69": "N/A",
    "70": "toilet",
    "71": "N/A",
    "72": "tv",
    "73": "laptop",
    "74": "mouse",
    "75": "remote",
    "76": "keyboard",
    "77": "cell phone",
    "78": "microwave",
    "79": "oven",
    "80": "toaster",
    "81": "sink",
    "82": "refrigerator",
    "83": "N/A",
    "84": "book",
    "85": "clock",
    "86": "vase",
    "87": "scissors",
    "88": "teddy bear",
    "89": "hair drier",
    "90": "toothbrush"
}

================================================
FILE: pytorch_object_detection/mask_rcnn/det_results20220406-141544.txt
================================================
epoch:0 0.171  0.342  0.154  0.099  0.211  0.213  0.184  0.315  0.334  0.168  0.375  0.440  1.3826  0.08
epoch:1 0.230  0.419  0.230  0.132  0.266  0.288  0.224  0.374  0.395  0.216  0.435  0.512  1.0356  0.08
epoch:2 0.242  0.435  0.244  0.133  0.272  0.313  0.233  0.393  0.416  0.232  0.452  0.532  0.9718  0.08
epoch:3 0.261  0.456  0.269  0.145  0.284  0.326  0.248  0.415  0.440  0.260  0.475  0.550  0.9363  0.08
epoch:4 0.266  0.458  0.277  0.150  0.301  0.337  0.250  0.409  0.433  0.245  0.467  0.564  0.9145  0.08
epoch:5 0.272  0.465  0.286  0.155  0.309  0.348  0.251  0.407  0.429  0.247  0.461  0.561  0.8982  0.08
epoch:6 0.288  0.482  0.303  0.163  0.321  0.363  0.263  0.431  0.452  0.265  0.491  0.570  0.8859  0.08
epoch:7 0.287  0.483  0.302  0.164  0.320  0.363  0.268  0.432  0.454  0.268  0.483  0.584  0.8771  0.08
epoch:8 0.298  0.492  0.318  0.166  0.336  0.377  0.268  0.434  0.454  0.265  0.500  0.580  0.8685  0.08
epoch:9 0.289  0.484  0.306  0.156  0.325  0.374  0.263  0.428  0.450  0.252  0.490  0.589  0.8612  0.08
epoch:10 0.297  0.489  0.316  0.167  0.330  0.381  0.270  0.436  0.459  0.258  0.501  0.579  0.8547  0.08
epoch:11 0.299  0.494  0.317  0.171  0.335  0.382  0.272  0.439  0.461  0.276  0.501  0.586  0.8498  0.08
epoch:12 0.301  0.497  0.321  0.178  0.333  0.390  0.270  0.443  0.466  0.277  0.505  0.600  0.8461  0.08
epoch:13 0.307  0.503  0.327  0.175  0.345  0.388  0.276  0.441  0.465  0.269  0.510  0.574  0.8409  0.08
epoch:14 0.299  0.491  0.319  0.171  0.339  0.372  0.271  0.445  0.470  0.284  0.508  0.593  0.8355  0.08
epoch:15 0.306  0.503  0.324  0.166  0.342  0.396  0.278  0.443  0.468  0.271  0.511  0.598  0.8330  0.08
epoch:16 0.374  0.579  0.407  0.214  0.415  0.476  0.311  0.500  0.526  0.325  0.573  0.659  0.7421  0.008
epoch:17 0.379  0.587  0.409  0.214  0.420  0.484  0.316  0.502  0.528  0.322  0.569  0.668  0.7157  0.008
epoch:18 0.380  0.587  0.411  0.214  0.423  0.486  0.315  0.503  0.528  0.323  0.571  0.669  0.7016  0.008
epoch:19 0.381  0.588  0.413  0.216  0.422  0.490  0.317  0.508  0.532  0.332  0.574  0.676  0.6897  0.008
epoch:20 0.379  0.586  0.410  0.212  0.418  0.488  0.313  0.499  0.523  0.317  0.566  0.667  0.6802  0.008
epoch:21 0.378  0.587  0.408  0.210  0.418  0.488  0.314  0.496  0.520  0.314  0.560  0.667  0.6708  0.008
epoch:22 0.381  0.588  0.411  0.213  0.420  0.495  0.316  0.500  0.524  0.318  0.567  0.673  0.6497  0.0008
epoch:23 0.381  0.588  0.411  0.215  0.420  0.492  0.315  0.499  0.523  0.319  0.565  0.666  0.6447  0.0008
epoch:24 0.381  0.588  0.412  0.214  0.419  0.495  0.316  0.499  0.523  0.317  0.565  0.669  0.6421  0.0008
epoch:25 0.380  0.585  0.411  0.214  0.419  0.494  0.314  0.498  0.522  0.316  0.566  0.664  0.6398  0.0008


================================================
FILE: pytorch_object_detection/mask_rcnn/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = True):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: pytorch_object_detection/mask_rcnn/my_dataset_coco.py
================================================
import os
import json

import torch
from PIL import Image
import torch.utils.data as data
from pycocotools.coco import COCO
from train_utils import coco_remove_images_without_annotations, convert_coco_poly_mask


class CocoDetection(data.Dataset):
    """`MS Coco Detection <https://cocodataset.org/>`_ Dataset.

    Args:
        root (string): Root directory where images are downloaded to.
        dataset (string): train or val.
        transforms (callable, optional): A function/transform that takes input sample and its target as entry
            and returns a transformed version.
    """

    def __init__(self, root, dataset="train", transforms=None, years="2017"):
        super(CocoDetection, self).__init__()
        assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]'
        anno_file = f"instances_{dataset}{years}.json"
        assert os.path.exists(root), "file '{}' does not exist.".format(root)
        self.img_root = os.path.join(root, f"{dataset}{years}")
        assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root)
        self.anno_path = os.path.join(root, "annotations", anno_file)
        assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path)

        self.mode = dataset
        self.transforms = transforms
        self.coco = COCO(self.anno_path)

        # 获取coco数据索引与类别名称的关系
        # 注意在object80中的索引并不是连续的，虽然只有80个类别，但索引还是按照stuff91来排序的
        data_classes = dict([(v["id"], v["name"]) for k, v in self.coco.cats.items()])
        max_index = max(data_classes.keys())  # 90
        # 将缺失的类别名称设置成N/A
        coco_classes = {}
        for k in range(1, max_index + 1):
            if k in data_classes:
                coco_classes[k] = data_classes[k]
            else:
                coco_classes[k] = "N/A"

        if dataset == "train":
            json_str = json.dumps(coco_classes, indent=4)
            with open("coco91_indices.json", "w") as f:
                f.write(json_str)

        self.coco_classes = coco_classes

        ids = list(sorted(self.coco.imgs.keys()))
        if dataset == "train":
            # 移除没有目标，或者目标面积非常小的数据
            valid_ids = coco_remove_images_without_annotations(self.coco, ids)
            self.ids = valid_ids
        else:
            self.ids = ids

    def parse_targets(self,
                      img_id: int,
                      coco_targets: list,
                      w: int = None,
                      h: int = None):
        assert w > 0
        assert h > 0

        # 只筛选出单个对象的情况
        anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]

        boxes = [obj["bbox"] for obj in anno]

        # guard against no boxes via resizing
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2].clamp_(min=0, max=w)
        boxes[:, 1::2].clamp_(min=0, max=h)

        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)

        area = torch.tensor([obj["area"] for obj in anno])
        iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])

        segmentations = [obj["segmentation"] for obj in anno]
        masks = convert_coco_poly_mask(segmentations, h, w)

        # 筛选出合法的目标，即x_max>x_min且y_max>y_min
        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]
        masks = masks[keep]
        area = area[keep]
        iscrowd = iscrowd[keep]

        target = {}
        target["boxes"] = boxes
        target["labels"] = classes
        target["masks"] = masks
        target["image_id"] = torch.tensor([img_id])

        # for conversion to coco api
        target["area"] = area
        target["iscrowd"] = iscrowd

        return target

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        coco_target = coco.loadAnns(ann_ids)

        path = coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.img_root, path)).convert('RGB')

        w, h = img.size
        target = self.parse_targets(img_id, coco_target, w, h)
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.ids)

    def get_height_and_width(self, index):
        coco = self.coco
        img_id = self.ids[index]

        img_info = coco.loadImgs(img_id)[0]
        w = img_info["width"]
        h = img_info["height"]
        return h, w

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))


if __name__ == '__main__':
    train = CocoDetection("/data/coco2017", dataset="train")
    print(len(train))
    t = train[0]


================================================
FILE: pytorch_object_detection/mask_rcnn/my_dataset_voc.py
================================================
import os
import json

from lxml import etree
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset
from train_utils import convert_to_coco_api


class VOCInstances(Dataset):
    def __init__(self, voc_root, year="2012", txt_name: str = "train.txt", transforms=None):
        super().__init__()
        if isinstance(year, int):
            year = str(year)
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        if "VOCdevkit" in voc_root:
            root = os.path.join(voc_root, f"VOC{year}")
        else:
            root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        assert os.path.exists(root), "path '{}' does not exist.".format(root)
        image_dir = os.path.join(root, 'JPEGImages')
        xml_dir = os.path.join(root, 'Annotations')
        mask_dir = os.path.join(root, 'SegmentationObject')

        txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
        assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
        with open(os.path.join(txt_path), "r") as f:
            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]

        # read class_indict
        json_file = 'pascal_voc_indices.json'
        assert os.path.exists(json_file), "{} file not exist.".format(json_file)
        with open(json_file, 'r') as f:
            idx2classes = json.load(f)
            self.class_dict = dict([(v, k) for k, v in idx2classes.items()])

        self.images_path = []     # 存储图片路径
        self.xmls_path = []       # 存储xml文件路径
        self.xmls_info = []       # 存储解析的xml字典文件
        self.masks_path = []      # 存储SegmentationObject图片路径
        self.objects_bboxes = []  # 存储解析的目标boxes等信息
        self.masks = []           # 存储读取的SegmentationObject图片信息

        # 检查图片、xml文件以及mask是否都在
        images_path = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        xmls_path = [os.path.join(xml_dir, x + '.xml') for x in file_names]
        masks_path = [os.path.join(mask_dir, x + ".png") for x in file_names]
        for idx, (img_path, xml_path, mask_path) in enumerate(zip(images_path, xmls_path, masks_path)):
            assert os.path.exists(img_path), f"not find {img_path}"
            assert os.path.exists(xml_path), f"not find {xml_path}"
            assert os.path.exists(mask_path), f"not find {mask_path}"

            # 解析xml中bbox信息
            with open(xml_path) as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            obs_dict = parse_xml_to_dict(xml)["annotation"]  # 将xml文件解析成字典
            obs_bboxes = parse_objects(obs_dict, xml_path, self.class_dict, idx)  # 解析出目标信息
            num_objs = obs_bboxes["boxes"].shape[0]

            # 读取SegmentationObject并检查是否和bboxes信息数量一致
            instances_mask = Image.open(mask_path)
            instances_mask = np.array(instances_mask)
            instances_mask[instances_mask == 255] = 0  # 255为背景或者忽略掉的地方，这里为了方便直接设置为背景(0)

            # 需要检查一下标注的bbox个数是否和instances个数一致
            num_instances = instances_mask.max()
            if num_objs != num_instances:
                print(f"warning: num_boxes:{num_objs} and num_instances:{num_instances} do not correspond. "
                      f"skip image:{img_path}")
                continue

            self.images_path.append(img_path)
            self.xmls_path.append(xml_path)
            self.xmls_info.append(obs_dict)
            self.masks_path.append(mask_path)
            self.objects_bboxes.append(obs_bboxes)
            self.masks.append(instances_mask)

        self.transforms = transforms
        self.coco = convert_to_coco_api(self)

    def parse_mask(self, idx: int):
        mask = self.masks[idx]
        c = mask.max()  # 有几个目标最大索引就等于几
        masks = []
        # 对每个目标的mask单独使用一个channel存放
        for i in range(1, c+1):
            masks.append(mask == i)
        masks = np.stack(masks, axis=0)
        return torch.as_tensor(masks, dtype=torch.uint8)

    def __getitem__(self, idx):
        """
        Args:
            idx (int): Index

        Returns:
            tuple: (image, target) where target is the image segmentation.
        """
        img = Image.open(self.images_path[idx]).convert('RGB')
        target = self.objects_bboxes[idx]
        masks = self.parse_mask(idx)
        target["masks"] = masks

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.images_path)

    def get_height_and_width(self, idx):
        """方便统计所有图片的高宽比例信息"""
        # read xml
        data = self.xmls_info[idx]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        return data_height, data_width

    def get_annotations(self, idx):
        """方便构建COCO()"""
        data = self.xmls_info[idx]
        h = int(data["size"]["height"])
        w = int(data["size"]["width"])
        target = self.objects_bboxes[idx]
        masks = self.parse_mask(idx)
        target["masks"] = masks
        return target, h, w

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))


def parse_xml_to_dict(xml):
    """
    将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
    Args:
        xml: xml tree obtained by parsing XML file contents using lxml.etree

    Returns:
        Python dictionary holding XML contents.
    """

    if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
        return {xml.tag: xml.text}

    result = {}
    for child in xml:
        child_result = parse_xml_to_dict(child)  # 递归遍历标签信息
        if child.tag != 'object':
            result[child.tag] = child_result[child.tag]
        else:
            if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                result[child.tag] = []
            result[child.tag].append(child_result[child.tag])
    return {xml.tag: result}


def parse_objects(data: dict, xml_path: str, class_dict: dict, idx: int):
    """
    解析出bboxes、labels、iscrowd以及ares等信息
    Args:
        data: 将xml解析成dict的Annotation数据
        xml_path: 对应xml的文件路径
        class_dict: 类别与索引对应关系
        idx: 图片对应的索引

    Returns:

    """
    boxes = []
    labels = []
    iscrowd = []
    assert "object" in data, "{} lack of object information.".format(xml_path)
    for obj in data["object"]:
        xmin = float(obj["bndbox"]["xmin"])
        xmax = float(obj["bndbox"]["xmax"])
        ymin = float(obj["bndbox"]["ymin"])
        ymax = float(obj["bndbox"]["ymax"])

        # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan
        if xmax <= xmin or ymax <= ymin:
            print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path))
            continue

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(int(class_dict[obj["name"]]))
        if "difficult" in obj:
            iscrowd.append(int(obj["difficult"]))
        else:
            iscrowd.append(0)

    # convert everything into a torch.Tensor
    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    labels = torch.as_tensor(labels, dtype=torch.int64)
    iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
    image_id = torch.tensor([idx])
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

    return {"boxes": boxes,
            "labels": labels,
            "iscrowd": iscrowd,
            "image_id": image_id,
            "area": area}


if __name__ == '__main__':
    dataset = VOCInstances(voc_root="/data/")
    print(len(dataset))
    d1 = dataset[0]


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/__init__.py
================================================
from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor
from .rpn_function import AnchorsGenerator
from .mask_rcnn import MaskRCNN


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/boxes.py
================================================
import torch
from typing import Tuple
from torch import Tensor
import torchvision


def nms(boxes, scores, iou_threshold):
    # type: (Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Parameters
    ----------
    boxes : Tensor[N, 4])
        boxes to perform NMS on. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    iou_threshold : float
        discards all overlapping
        boxes with IoU > iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices
        of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)


def batched_nms(boxes, scores, idxs, iou_threshold):
    # type: (Tensor, Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression in a batched fashion.

    Each index value correspond to a category, and NMS
    will not be applied between elements of different categories.

    Parameters
    ----------
    boxes : Tensor[N, 4]
        boxes where NMS will be performed. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    idxs : Tensor[N]
        indices of the categories for each one of the boxes.
    iou_threshold : float
        discards all overlapping boxes
        with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices of
        the elements that have been kept by NMS, sorted
        in decreasing order of scores
    """
    if boxes.numel() == 0:
        return torch.empty((0,), dtype=torch.int64, device=boxes.device)

    # strategy: in order to perform NMS independently per class.
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）
    max_coordinate = boxes.max()

    # to(): Performs Tensor dtype and/or device conversion
    # 为每一个类别/每一层生成一个很大的偏移量
    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致
    offsets = idxs.to(boxes) * (max_coordinate + 1)
    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    return keep


def remove_small_boxes(boxes, min_size):
    # type: (Tensor, float) -> Tensor
    """
    Remove boxes which contains at least one side smaller than min_size.
    移除宽高小于指定阈值的索引
    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        min_size (float): minimum size

    Returns:
        keep (Tensor[K]): indices of the boxes that have both sides
            larger than min_size
    """
    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高
    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True
    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))
    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input
    # keep = keep.nonzero().squeeze(1)
    keep = torch.where(keep)[0]
    return keep


def clip_boxes_to_image(boxes, size):
    # type: (Tensor, Tuple[int, int]) -> Tensor
    """
    Clip boxes so that they lie inside an image of size `size`.
    裁剪预测的boxes信息，将越界的坐标调整到图片边界上

    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        size (Tuple[height, width]): size of the image

    Returns:
        clipped_boxes (Tensor[N, 4])
    """
    dim = boxes.dim()
    boxes_x = boxes[..., 0::2]  # x1, x2
    boxes_y = boxes[..., 1::2]  # y1, y2
    height, width = size

    if torchvision._is_tracing():
        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
    else:
        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间
        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间

    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
    return clipped_boxes.reshape(boxes.shape)


def box_area(boxes):
    """
    Computes the area of a set of bounding boxes, which are specified by its
    (x1, y1, x2, y2) coordinates.

    Arguments:
        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
            are expected to be in (x1, y1, x2, y2) format

    Returns:
        area (Tensor[N]): area for each box
    """
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])


def box_iou(boxes1, boxes2):
    """
    Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

    Arguments:
        boxes1 (Tensor[N, 4])
        boxes2 (Tensor[M, 4])

    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    #  When the shapes do not match,
    #  the shape of the returned output tensor follows the broadcasting rules
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    iou = inter / (area1[:, None] + area2 - inter)
    return iou


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/det_utils.py
================================================
import torch
import math
from typing import List, Tuple
from torch import Tensor


class BalancedPositiveNegativeSampler(object):
    """
    This class samples batches, ensuring that they contain a fixed proportion of positives
    """

    def __init__(self, batch_size_per_image, positive_fraction):
        # type: (int, float) -> None
        """
        Arguments:
            batch_size_per_image (int): number of elements to be selected per image
            positive_fraction (float): percentage of positive elements per batch
        """
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction

    def __call__(self, matched_idxs):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        Arguments:
            matched idxs: list of tensors containing -1, 0 or positive values.
                Each tensor corresponds to a specific image.
                -1 values are ignored, 0 are considered as negatives and > 0 as
                positives.

        Returns:
            pos_idx (list[tensor])
            neg_idx (list[tensor])

        Returns two lists of binary masks for each image.
        The first list contains the positive elements that were selected,
        and the second list the negative example.
        """
        pos_idx = []
        neg_idx = []
        # 遍历每张图像的matched_idxs
        for matched_idxs_per_image in matched_idxs:
            # >= 1的为正样本, nonzero返回非零元素索引
            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]
            # = 0的为负样本
            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]

            # 指定正样本的数量
            num_pos = int(self.batch_size_per_image * self.positive_fraction)
            # protect against not enough positive examples
            # 如果正样本数量不够就直接采用所有正样本
            num_pos = min(positive.numel(), num_pos)
            # 指定负样本数量
            num_neg = self.batch_size_per_image - num_pos
            # protect against not enough negative examples
            # 如果负样本数量不够就直接采用所有负样本
            num_neg = min(negative.numel(), num_neg)

            # randomly select positive and negative examples
            # Returns a random permutation of integers from 0 to n - 1.
            # 随机选择指定数量的正负样本
            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]

            pos_idx_per_image = positive[perm1]
            neg_idx_per_image = negative[perm2]

            # create binary mask from indices
            pos_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )
            neg_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )

            pos_idx_per_image_mask[pos_idx_per_image] = 1
            neg_idx_per_image_mask[neg_idx_per_image] = 1

            pos_idx.append(pos_idx_per_image_mask)
            neg_idx.append(neg_idx_per_image_mask)

        return pos_idx, neg_idx


@torch.jit._script_if_tracing
def encode_boxes(reference_boxes, proposals, weights):
    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
    """
    Encode a set of proposals with respect to some
    reference boxes

    Arguments:
        reference_boxes (Tensor): reference boxes(gt)
        proposals (Tensor): boxes to be encoded(anchors)
        weights:
    """

    # perform some unpacking to make it JIT-fusion friendly
    wx = weights[0]
    wy = weights[1]
    ww = weights[2]
    wh = weights[3]

    # unsqueeze()
    # Returns a new tensor with a dimension of size one inserted at the specified position.
    proposals_x1 = proposals[:, 0].unsqueeze(1)
    proposals_y1 = proposals[:, 1].unsqueeze(1)
    proposals_x2 = proposals[:, 2].unsqueeze(1)
    proposals_y2 = proposals[:, 3].unsqueeze(1)

    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)
    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)
    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)
    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)

    # implementation starts here
    # parse widths and heights
    ex_widths = proposals_x2 - proposals_x1
    ex_heights = proposals_y2 - proposals_y1
    # parse coordinate of center point
    ex_ctr_x = proposals_x1 + 0.5 * ex_widths
    ex_ctr_y = proposals_y1 + 0.5 * ex_heights

    gt_widths = reference_boxes_x2 - reference_boxes_x1
    gt_heights = reference_boxes_y2 - reference_boxes_y1
    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths
    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights

    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = ww * torch.log(gt_widths / ex_widths)
    targets_dh = wh * torch.log(gt_heights / ex_heights)

    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
    return targets


class BoxCoder(object):
    """
    This class encodes and decodes a set of bounding boxes into
    the representation used for training the regressors.
    """

    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
        # type: (Tuple[float, float, float, float], float) -> None
        """
        Arguments:
            weights (4-element tuple)
            bbox_xform_clip (float)
        """
        self.weights = weights
        self.bbox_xform_clip = bbox_xform_clip

    def encode(self, reference_boxes, proposals):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        结合anchors和与之对应的gt计算regression参数
        Args:
            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes
            proposals: List[Tensor] anchors/proposals

        Returns: regression parameters

        """
        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开
        # reference_boxes和proposal数据结构相同
        boxes_per_image = [len(b) for b in reference_boxes]
        reference_boxes = torch.cat(reference_boxes, dim=0)
        proposals = torch.cat(proposals, dim=0)

        # targets_dx, targets_dy, targets_dw, targets_dh
        targets = self.encode_single(reference_boxes, proposals)
        return targets.split(boxes_per_image, 0)

    def encode_single(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """
        dtype = reference_boxes.dtype
        device = reference_boxes.device
        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)
        targets = encode_boxes(reference_boxes, proposals, weights)

        return targets

    def decode(self, rel_codes, boxes):
        # type: (Tensor, List[Tensor]) -> Tensor
        """

        Args:
            rel_codes: bbox regression parameters
            boxes: anchors/proposals

        Returns:

        """
        assert isinstance(boxes, (list, tuple))
        assert isinstance(rel_codes, torch.Tensor)
        boxes_per_image = [b.size(0) for b in boxes]
        concat_boxes = torch.cat(boxes, dim=0)

        box_sum = 0
        for val in boxes_per_image:
            box_sum += val

        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标
        pred_boxes = self.decode_single(
            rel_codes, concat_boxes
        )

        # 防止pred_boxes为空时导致reshape报错
        if box_sum > 0:
            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)

        return pred_boxes

    def decode_single(self, rel_codes, boxes):
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Arguments:
            rel_codes (Tensor): encoded boxes (bbox regression parameters)
            boxes (Tensor): reference boxes (anchors/proposals)
        """
        boxes = boxes.to(rel_codes.dtype)

        # xmin, ymin, xmax, ymax
        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度
        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度
        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标
        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标

        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]
        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数
        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数
        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数
        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数

        # limit max value, prevent sending too large values into torch.exp()
        # self.bbox_xform_clip=math.log(1000. / 16)   4.135
        dw = torch.clamp(dw, max=self.bbox_xform_clip)
        dh = torch.clamp(dh, max=self.bbox_xform_clip)

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        # xmin
        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymin
        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
        # xmax
        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymax
        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h

        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
        return pred_boxes


class Matcher(object):
    BELOW_LOW_THRESHOLD = -1
    BETWEEN_THRESHOLDS = -2

    __annotations__ = {
        'BELOW_LOW_THRESHOLD': int,
        'BETWEEN_THRESHOLDS': int,
    }

    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
        # type: (float, float, bool) -> None
        """
        Args:
            high_threshold (float): quality values greater than or equal to
                this value are candidate matches.
            low_threshold (float): a lower quality threshold used to stratify
                matches into three levels:
                1) matches >= high_threshold
                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
            allow_low_quality_matches (bool): if True, produce additional matches
                for predictions that have only low-quality match candidates. See
                set_low_quality_matches_ for more details.
        """
        self.BELOW_LOW_THRESHOLD = -1
        self.BETWEEN_THRESHOLDS = -2
        assert low_threshold <= high_threshold
        self.high_threshold = high_threshold  # 0.7
        self.low_threshold = low_threshold    # 0.3
        self.allow_low_quality_matches = allow_low_quality_matches

    def __call__(self, match_quality_matrix):
        """
        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，
        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        """
        if match_quality_matrix.numel() == 0:
            # empty targets or proposals not supported during training
            if match_quality_matrix.shape[0] == 0:
                raise ValueError(
                    "No ground-truth boxes available for one of the images "
                    "during training")
            else:
                raise ValueError(
                    "No proposal boxes available for one of the images "
                    "during training")

        # match_quality_matrix is M (gt) x N (predicted)
        # Max over gt elements (dim 0) to find best gt candidate for each prediction
        # M x N 的每一列代表一个anchors与所有gt的匹配iou值
        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值
        # matches对应最大值所在的索引
        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.
        if self.allow_low_quality_matches:
            all_matches = matches.clone()
        else:
            all_matches = None

        # Assign candidate matches with low quality to negative (unassigned) values
        # 计算iou小于low_threshold的索引
        below_low_threshold = matched_vals < self.low_threshold
        # 计算iou在low_threshold与high_threshold之间的索引值
        between_thresholds = (matched_vals >= self.low_threshold) & (
            matched_vals < self.high_threshold
        )
        # iou小于low_threshold的matches索引置为-1
        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1

        # iou在[low_threshold, high_threshold]之间的matches索引置为-2
        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2

        if self.allow_low_quality_matches:
            assert all_matches is not None
            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)

        return matches

    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
        """
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        """
        # For each gt, find the prediction with which it has highest quality
        # 对于每个gt boxes寻找与其iou最大的anchor，
        # highest_quality_foreach_gt为匹配到的最大iou值
        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.

        # Find highest quality match available, even if it is low, including ties
        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor
        # gt_pred_pairs_of_highest_quality = torch.nonzero(
        #     match_quality_matrix == highest_quality_foreach_gt[:, None]
        # )
        gt_pred_pairs_of_highest_quality = torch.where(
            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])
        )
        # Example gt_pred_pairs_of_highest_quality:
        #   tensor([[    0, 39796],
        #           [    1, 32055],
        #           [    1, 32070],
        #           [    2, 39190],
        #           [    2, 40255],
        #           [    3, 40390],
        #           [    3, 41455],
        #           [    4, 45470],
        #           [    5, 45325],
        #           [    5, 46390]])
        # Each row is a (gt index, prediction index)
        # Note how gt items 1, 2, 3, and 5 each have two ties

        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)
        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]
        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值
        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]


def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):
    """
    very similar to the smooth_l1_loss from pytorch, but with
    the extra beta parameter
    """
    n = torch.abs(input - target)
    # cond = n < beta
    cond = torch.lt(n, beta)
    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
    if size_average:
        return loss.mean()
    return loss.sum()


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/faster_rcnn_framework.py
================================================
import warnings
from collections import OrderedDict
from typing import Tuple, List, Dict, Optional, Union

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torchvision.ops import MultiScaleRoIAlign

from .roi_head import RoIHeads
from .transform import GeneralizedRCNNTransform
from .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork


class FasterRCNNBase(nn.Module):
    """
    Main class for Generalized R-CNN.

    Arguments:
        backbone (nn.Module):
        rpn (nn.Module):
        roi_heads (nn.Module): takes the features + the proposals from the RPN and computes
            detections / masks from it.
        transform (nn.Module): performs the data transformation from the inputs to feed into
            the model
    """

    def __init__(self, backbone, rpn, roi_heads, transform):
        super(FasterRCNNBase, self).__init__()
        self.transform = transform
        self.backbone = backbone
        self.rpn = rpn
        self.roi_heads = roi_heads
        # used only on torchscript mode
        self._has_warned = False

    @torch.jit.unused
    def eager_outputs(self, losses, detections):
        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]
        if self.training:
            return losses

        return detections

    def forward(self, images, targets=None):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Arguments:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")

        if self.training:
            assert targets is not None
            for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError("Expected target boxes to be a tensor"
                                         "of shape [N, 4], got {:}.".format(
                                          boxes.shape))
                else:
                    raise ValueError("Expected target boxes to be of type "
                                     "Tensor, got {:}.".format(type(boxes)))

        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2  # 防止输入的是个一维向量
            original_image_sizes.append((val[0], val[1]))
        # original_image_sizes = [img.shape[-2:] for img in images]

        images, targets = self.transform(images, targets)  # 对图像进行预处理
        # print(images.tensors.shape)
        features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图
        if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’
            features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典

        # 将特征层以及标注target信息传入rpn中
        # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],
        # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式
        proposals, proposal_losses = self.rpn(images, features, targets)

        # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分
        detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)

        # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)

        if torch.jit.is_scripting():
            if not self._has_warned:
                warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting")
                self._has_warned = True
            return losses, detections
        else:
            return self.eager_outputs(losses, detections)

        # if self.training:
        #     return losses
        #
        # return detections


class TwoMLPHead(nn.Module):
    """
    Standard heads for FPN-based models

    Arguments:
        in_channels (int): number of input channels
        representation_size (int): size of the intermediate representation
    """

    def __init__(self, in_channels, representation_size):
        super(TwoMLPHead, self).__init__()

        self.fc6 = nn.Linear(in_channels, representation_size)
        self.fc7 = nn.Linear(representation_size, representation_size)

    def forward(self, x):
        x = x.flatten(start_dim=1)

        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))

        return x


class FastRCNNPredictor(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Arguments:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes):
        super(FastRCNNPredictor, self).__init__()
        self.cls_score = nn.Linear(in_channels, num_classes)
        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas


class FasterRCNN(FasterRCNNBase):
    """
    Implements Faster R-CNN.

    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
    image, and should be in 0-1 range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values
          between 0 and H and 0 and W
        - labels (Int64Tensor[N]): the class label for each ground-truth box

    The model returns a Dict[Tensor] during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
    follows:
        - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between
          0 and H and 0 and W
        - labels (Int64Tensor[N]): the predicted labels for each image
        - scores (Tensor[N]): the scores or each prediction

    Arguments:
        backbone (nn.Module): the network used to compute the features for the model.
            It should contain a out_channels attribute, which indicates the number of output
            channels that each feature map has (and it should be the same for all feature maps).
            The backbone should return a single Tensor or and OrderedDict[Tensor].
        num_classes (int): number of output classes of the model (including the background).
            If box_predictor is specified, num_classes should be None.
        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
        image_mean (Tuple[float, float, float]): mean values used for input normalization.
            They are generally the mean values of the dataset on which the backbone has been trained
            on
        image_std (Tuple[float, float, float]): std values used for input normalization.
            They are generally the std values of the dataset on which the backbone has been trained on
        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training of the RPN.
        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training of the RPN.
        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
            for computing the loss
        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
        rpn_score_thresh (float): during inference, only return proposals with a classification score
            greater than rpn_score_thresh
        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
            the locations indicated by the bounding boxes
        box_head (nn.Module): module that takes the cropped feature maps as input
        box_predictor (nn.Module): module that takes the output of box_head and returns the
            classification logits and box regression deltas.
        box_score_thresh (float): during inference, only return proposals with a classification score
            greater than box_score_thresh
        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
        box_detections_per_img (int): maximum number of detections per image, for all classes.
        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
            considered as positive during training of the classification head
        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
            considered as negative during training of the classification head
        box_batch_size_per_image (int): number of proposals that are sampled during training of the
            classification head
        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
            of the classification head
        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
            bounding boxes

    """

    def __init__(self, backbone, num_classes=None,
                 # transform parameter
                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸
                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数
                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例
                 rpn_score_thresh=0.0,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值
                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例
                 bbox_reg_weights=None):
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels"
                "specifying the number of output channels  (assumed to be the"
                "same for all the levels"
            )

        # assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError("num_classes should be None when box_predictor "
                                 "is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should not be None when box_predictor "
                                 "is not specified")

        # 预测特征层的channels
        out_channels = backbone.out_channels

        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器
        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorsGenerator(
                anchor_sizes, aspect_ratios
            )

        # 生成RPN通过滑动窗口预测网络部分
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,
        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,
        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

        # 定义整个RPN框架
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,
            score_thresh=rpn_score_thresh)

        #  Multi-scale RoIAlign pooling
        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling
                output_size=[7, 7],
                sampling_ratio=2)

        # fast RCNN中roi pooling后的展平处理两个全连接层部分
        if box_head is None:
            resolution = box_roi_pool.output_size[0]  # 默认等于7
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size
            )

        # 在box_head的输出上预测部分
        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                num_classes)

        # 将roi pooling, box_head以及box_predictor结合在一起
        roi_heads = RoIHeads(
            # box
            box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5
            box_batch_size_per_image, box_positive_fraction,  # 512  0.25
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]

        # 对数据进行标准化，缩放，打包成batch等处理部分
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/image_list.py
================================================
from typing import List, Tuple
from torch import Tensor


class ImageList(object):
    """
    Structure that holds a list of images (of possibly
    varying sizes) as a single tensor.
    This works by padding the images to the same size,
    and storing in a field the original sizes of each image
    """

    def __init__(self, tensors, image_sizes):
        # type: (Tensor, List[Tuple[int, int]]) -> None
        """
        Arguments:
            tensors (tensor) padding后的图像数据
            image_sizes (list[tuple[int, int]])  padding前的图像尺寸
        """
        self.tensors = tensors
        self.image_sizes = image_sizes

    def to(self, device):
        # type: (Device) -> ImageList # noqa
        cast_tensor = self.tensors.to(device)
        return ImageList(cast_tensor, self.image_sizes)


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/mask_rcnn.py
================================================
from collections import OrderedDict
import torch.nn as nn
from torchvision.ops import MultiScaleRoIAlign

from .faster_rcnn_framework import FasterRCNN


class MaskRCNN(FasterRCNN):
    """
        Implements Mask R-CNN.

        The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
        image, and should be in 0-1 range. Different images can have different sizes.

        The behavior of the model changes depending if it is in training or evaluation mode.

        During training, the model expects both the input tensors, as well as a targets (list of dictionary),
        containing:
            - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
              ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
            - labels (Int64Tensor[N]): the class label for each ground-truth box
            - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance

        The model returns a Dict[Tensor] during training, containing the classification and regression
        losses for both the RPN and the R-CNN, and the mask loss.

        During inference, the model requires only the input tensors, and returns the post-processed
        predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
        follows:
            - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
              ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
            - labels (Int64Tensor[N]): the predicted labels for each image
            - scores (Tensor[N]): the scores or each prediction
            - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to
              obtain the final segmentation masks, the soft masks can be thresholded, generally
              with a value of 0.5 (mask >= 0.5)

        Args:
            backbone (nn.Module): the network used to compute the features for the model.
                It should contain a out_channels attribute, which indicates the number of output
                channels that each feature map has (and it should be the same for all feature maps).
                The backbone should return a single Tensor or and OrderedDict[Tensor].
            num_classes (int): number of output classes of the model (including the background).
                If box_predictor is specified, num_classes should be None.
            min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
            max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
            image_mean (Tuple[float, float, float]): mean values used for input normalization.
                They are generally the mean values of the dataset on which the backbone has been trained
                on
            image_std (Tuple[float, float, float]): std values used for input normalization.
                They are generally the std values of the dataset on which the backbone has been trained on
            rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
                maps.
            rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
            rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
            rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
            rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
            rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
            rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
            rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
                considered as positive during training of the RPN.
            rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
                considered as negative during training of the RPN.
            rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
                for computing the loss
            rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
                of the RPN
            rpn_score_thresh (float): during inference, only return proposals with a classification score
                greater than rpn_score_thresh
            box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
                the locations indicated by the bounding boxes
            box_head (nn.Module): module that takes the cropped feature maps as input
            box_predictor (nn.Module): module that takes the output of box_head and returns the
                classification logits and box regression deltas.
            box_score_thresh (float): during inference, only return proposals with a classification score
                greater than box_score_thresh
            box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
            box_detections_per_img (int): maximum number of detections per image, for all classes.
            box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
                considered as positive during training of the classification head
            box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
                considered as negative during training of the classification head
            box_batch_size_per_image (int): number of proposals that are sampled during training of the
                classification head
            box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
                of the classification head
            bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
                bounding boxes
            mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
                 the locations indicated by the bounding boxes, which will be used for the mask head.
            mask_head (nn.Module): module that takes the cropped feature maps as input
            mask_predictor (nn.Module): module that takes the output of the mask_head and returns the
                segmentation mask logits

        """

    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            rpn_score_thresh=0.0,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Mask parameters
            mask_roi_pool=None,
            mask_head=None,
            mask_predictor=None,
    ):

        if not isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None))):
            raise TypeError(
                f"mask_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(mask_roi_pool)}"
            )

        if num_classes is not None:
            if mask_predictor is not None:
                raise ValueError("num_classes should be None when mask_predictor is specified")

        out_channels = backbone.out_channels

        if mask_roi_pool is None:
            mask_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=14, sampling_ratio=2)

        if mask_head is None:
            mask_layers = (256, 256, 256, 256)
            mask_dilation = 1
            mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation)

        if mask_predictor is None:
            mask_predictor_in_channels = 256
            mask_dim_reduced = 256
            mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes)

        super().__init__(
            backbone,
            num_classes,
            # transform parameters
            min_size,
            max_size,
            image_mean,
            image_std,
            # RPN-specific parameters
            rpn_anchor_generator,
            rpn_head,
            rpn_pre_nms_top_n_train,
            rpn_pre_nms_top_n_test,
            rpn_post_nms_top_n_train,
            rpn_post_nms_top_n_test,
            rpn_nms_thresh,
            rpn_fg_iou_thresh,
            rpn_bg_iou_thresh,
            rpn_batch_size_per_image,
            rpn_positive_fraction,
            rpn_score_thresh,
            # Box parameters
            box_roi_pool,
            box_head,
            box_predictor,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
        )

        self.roi_heads.mask_roi_pool = mask_roi_pool
        self.roi_heads.mask_head = mask_head
        self.roi_heads.mask_predictor = mask_predictor


class MaskRCNNHeads(nn.Sequential):
    def __init__(self, in_channels, layers, dilation):
        """
        Args:
            in_channels (int): number of input channels
            layers (tuple): feature dimensions of each FCN layer
            dilation (int): dilation rate of kernel
        """
        d = OrderedDict()
        next_feature = in_channels

        for layer_idx, layers_features in enumerate(layers, 1):
            d[f"mask_fcn{layer_idx}"] = nn.Conv2d(next_feature,
                                                  layers_features,
                                                  kernel_size=3,
                                                  stride=1,
                                                  padding=dilation,
                                                  dilation=dilation)
            d[f"relu{layer_idx}"] = nn.ReLU(inplace=True)
            next_feature = layers_features

        super().__init__(d)
        # initial params
        for name, param in self.named_parameters():
            if "weight" in name:
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")


class MaskRCNNPredictor(nn.Sequential):
    def __init__(self, in_channels, dim_reduced, num_classes):
        super().__init__(OrderedDict([
            ("conv5_mask", nn.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)),
            ("relu", nn.ReLU(inplace=True)),
            ("mask_fcn_logits", nn.Conv2d(dim_reduced, num_classes, 1, 1, 0))
        ]))
        # initial params
        for name, param in self.named_parameters():
            if "weight" in name:
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/roi_head.py
================================================
from typing import Optional, List, Dict, Tuple

import torch
from torch import Tensor
import torch.nn.functional as F
from torchvision.ops import roi_align

from . import det_utils
from . import boxes as box_ops


def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    Computes the loss for Faster R-CNN.

    Arguments:
        class_logits : 预测类别概率信息，shape=[num_anchors, num_classes]
        box_regression : 预测边目标界框回归信息
        labels : 真实类别信息
        regression_targets : 真实目标边界框信息

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """

    labels = torch.cat(labels, dim=0)
    regression_targets = torch.cat(regression_targets, dim=0)

    # 计算类别损失信息
    classification_loss = F.cross_entropy(class_logits, labels)

    # get indices that correspond to the regression targets for
    # the corresponding ground truth labels, to be used with
    # advanced indexing
    # 返回标签类别大于0的索引
    # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1)
    sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0]

    # 返回标签类别大于0位置的类别信息
    labels_pos = labels[sampled_pos_inds_subset]

    # shape=[num_proposal, num_classes]
    N, num_classes = class_logits.shape
    box_regression = box_regression.reshape(N, -1, 4)

    # 计算边界框损失信息
    box_loss = det_utils.smooth_l1_loss(
        # 获取指定索引proposal的指定类别box信息
        box_regression[sampled_pos_inds_subset, labels_pos],
        regression_targets[sampled_pos_inds_subset],
        beta=1 / 9,
        size_average=False,
    ) / labels.numel()

    return classification_loss, box_loss


def maskrcnn_inference(x, labels):
    # type: (Tensor, List[Tensor]) -> List[Tensor]
    """
    From the results of the CNN, post process the masks
    by taking the mask corresponding to the class with max
    probability (which are of fixed size and directly output
    by the CNN) and return the masks in the mask field of the BoxList.

    Args:
        x (Tensor): the mask logits
        labels (list[BoxList]): bounding boxes that are used as
            reference, one for ech image

    Returns:
        results (list[BoxList]): one BoxList for each image, containing
            the extra field mask
    """
    # 将预测值通过sigmoid激活全部缩放到0~1之间
    mask_prob = x.sigmoid()

    # select masks corresponding to the predicted classes
    num_masks = x.shape[0]
    # 先记录每张图片中boxes/masks的个数
    boxes_per_image = [label.shape[0] for label in labels]
    # 在将所有图片中的masks信息拼接在一起(拼接后统一处理能够提升并行度)
    labels = torch.cat(labels)
    index = torch.arange(num_masks, device=labels.device)
    # 提取每个masks中对应预测最终类别的mask
    mask_prob = mask_prob[index, labels][:, None]
    # 最后再按照每张图片中的masks个数分离开
    mask_prob = mask_prob.split(boxes_per_image, dim=0)

    return mask_prob


def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
    # type: (Tensor, Tensor, Tensor, int) -> Tensor
    """
    Given segmentation masks and the bounding boxes corresponding
    to the location of the masks in the image, this function
    crops and resizes the masks in the position defined by the
    boxes. This prepares the masks for them to be fed to the
    loss computation as the targets.
    """
    matched_idxs = matched_idxs.to(boxes)
    rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
    gt_masks = gt_masks[:, None].to(rois)
    return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]


def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
    """

    Args:
        mask_logits:
        proposals:
        gt_masks:
        gt_labels:
        mask_matched_idxs:

    Returns:
        mask_loss (Tensor): scalar tensor containing the loss
    """

    # 28(FCN分支输出mask的大小)
    discretization_size = mask_logits.shape[-1]
    # 获取每个Proposal(全部为正样本)对应的gt类别
    labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
    # 根据Proposal信息在gt_masks上裁剪对应区域做为计算loss时的真正gt_mask
    mask_targets = [
        project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
    ]

    # 将一个batch中所有的Proposal对应信息拼接在一起(统一处理提高并行度)
    labels = torch.cat(labels, dim=0)
    mask_targets = torch.cat(mask_targets, dim=0)

    # torch.mean (in binary_cross_entropy_with_logits) doesn't
    # accept empty tensors, so handle it separately
    if mask_targets.numel() == 0:
        return mask_logits.sum() * 0

    # 计算预测mask与真实gt_mask之间的BCELoss
    mask_loss = F.binary_cross_entropy_with_logits(
        mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
    )
    return mask_loss


class RoIHeads(torch.nn.Module):
    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
    }

    def __init__(self,
                 box_roi_pool,   # Multi-scale RoIAlign pooling
                 box_head,       # TwoMLPHead
                 box_predictor,  # FastRCNNPredictor
                 # Faster R-CNN training
                 fg_iou_thresh, bg_iou_thresh,  # default: 0.5, 0.5
                 batch_size_per_image, positive_fraction,  # default: 512, 0.25
                 bbox_reg_weights,  # None
                 # Faster R-CNN inference
                 score_thresh,        # default: 0.05
                 nms_thresh,          # default: 0.5
                 detection_per_img,   # default: 100
                 # Mask
                 mask_roi_pool=None,
                 mask_head=None,
                 mask_predictor=None,
                 ):
        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,  # default: 0.5
            bg_iou_thresh,  # default: 0.5
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image,  # default: 512
            positive_fraction)     # default: 0.25

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool    # Multi-scale RoIAlign pooling
        self.box_head = box_head            # TwoMLPHead
        self.box_predictor = box_predictor  # FastRCNNPredictor

        self.score_thresh = score_thresh  # default: 0.05
        self.nms_thresh = nms_thresh      # default: 0.5
        self.detection_per_img = detection_per_img  # default: 100

        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

    def has_mask(self):
        if self.mask_roi_pool is None:
            return False
        if self.mask_head is None:
            return False
        if self.mask_predictor is None:
            return False
        return True

    def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        为每个proposal匹配对应的gt_box，并划分到正负样本中
        Args:
            proposals:
            gt_boxes:
            gt_labels:

        Returns:

        """
        matched_idxs = []
        labels = []
        # 遍历每张图像的proposals, gt_boxes, gt_labels信息
        for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
            if gt_boxes_in_image.numel() == 0:  # 该张图像中没有gt框，为背景
                # background image
                device = proposals_in_image.device
                clamped_matched_idxs_in_image = torch.zeros(
                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device
                )
                labels_in_image = torch.zeros(
                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device
                )
            else:
                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
                # 计算proposal与每个gt_box的iou重合度
                match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)

                # 计算proposal与每个gt_box匹配的iou最大值，并记录索引，
                # iou < low_threshold索引值为 -1， low_threshold <= iou < high_threshold索引值为 -2
                matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)

                # 限制最小值，防止匹配标签时出现越界的情况
                # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别（实际上并不是）,后续会进一步处理
                clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
                # 获取proposal匹配到的gt对应标签
                labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
                labels_in_image = labels_in_image.to(dtype=torch.int64)

                # label background (below the low threshold)
                # 将gt索引为-1的类别设置为0，即背景，负样本
                bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1
                labels_in_image[bg_inds] = 0

                # label ignore proposals (between low and high threshold)
                # 将gt索引为-2的类别设置为-1, 即废弃样本
                ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2
                labels_in_image[ignore_inds] = -1  # -1 is ignored by sampler

            matched_idxs.append(clamped_matched_idxs_in_image)
            labels.append(labels_in_image)
        return matched_idxs, labels

    def subsample(self, labels):
        # type: (List[Tensor]) -> List[Tensor]
        # BalancedPositiveNegativeSampler
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_inds = []
        # 遍历每张图片的正负样本索引
        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
            # 记录所有采集样本索引（包括正样本和负样本）
            # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
            img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
            sampled_inds.append(img_sampled_inds)
        return sampled_inds

    def add_gt_proposals(self, proposals, gt_boxes):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        将gt_boxes拼接到proposal后面
        Args:
            proposals: 一个batch中每张图像rpn预测的boxes
            gt_boxes:  一个batch中每张图像对应的真实目标边界框

        Returns:

        """
        proposals = [
            torch.cat((proposal, gt_box))
            for proposal, gt_box in zip(proposals, gt_boxes)
        ]
        return proposals

    def check_targets(self, targets):
        # type: (Optional[List[Dict[str, Tensor]]]) -> None
        assert targets is not None
        assert all(["boxes" in t for t in targets])
        assert all(["labels" in t for t in targets])

    def select_training_samples(self,
                                proposals,  # type: List[Tensor]
                                targets     # type: Optional[List[Dict[str, Tensor]]]
                                ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
        """
        划分正负样本，统计对应gt的标签以及边界框回归信息
        list元素个数为batch_size
        Args:
            proposals: rpn预测的boxes
            targets:

        Returns:

        """

        # 检查target数据是否为空
        self.check_targets(targets)
        if targets is None:
            raise ValueError("target should not be None.")

        dtype = proposals[0].dtype
        device = proposals[0].device

        # 获取标注好的boxes以及labels信息
        gt_boxes = [t["boxes"].to(dtype) for t in targets]
        gt_labels = [t["labels"] for t in targets]

        # append ground-truth bboxes to proposal
        # 将gt_boxes拼接到proposal后面
        proposals = self.add_gt_proposals(proposals, gt_boxes)

        # get matching gt indices for each proposal
        # 为每个proposal匹配对应的gt_box，并划分到正负样本中
        matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
        # sample a fixed proportion of positive-negative proposals
        # 按给定数量和比例采样正负样本
        sampled_inds = self.subsample(labels)
        matched_gt_boxes = []
        num_images = len(proposals)

        # 遍历每张图像
        for img_id in range(num_images):
            # 获取每张图像的正负样本索引
            img_sampled_inds = sampled_inds[img_id]
            # 获取对应正负样本的proposals信息
            proposals[img_id] = proposals[img_id][img_sampled_inds]
            # 获取对应正负样本的真实类别信息
            labels[img_id] = labels[img_id][img_sampled_inds]
            # 获取对应正负样本的gt索引信息
            matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]

            gt_boxes_in_image = gt_boxes[img_id]
            if gt_boxes_in_image.numel() == 0:
                gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
            # 获取对应正负样本的gt box信息
            matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])

        # 根据gt和proposal计算边框回归参数（针对gt的）
        regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
        return proposals, matched_idxs, labels, regression_targets

    def postprocess_detections(self,
                               class_logits,    # type: Tensor
                               box_regression,  # type: Tensor
                               proposals,       # type: List[Tensor]
                               image_shapes     # type: List[Tuple[int, int]]
                               ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        """
        对网络的预测数据进行后处理，包括
        （1）根据proposal以及预测的回归参数计算出最终bbox坐标
        （2）对预测类别结果进行softmax处理
        （3）裁剪预测的boxes信息，将越界的坐标调整到图片边界上
        （4）移除所有背景信息
        （5）移除低概率目标
        （6）移除小尺寸目标
        （7）执行nms处理，并按scores进行排序
        （8）根据scores排序返回前topk个目标
        Args:
            class_logits: 网络预测类别概率信息
            box_regression: 网络预测的边界框回归参数
            proposals: rpn输出的proposal
            image_shapes: 打包成batch前每张图像的宽高

        Returns:

        """
        device = class_logits.device
        # 预测目标类别数
        num_classes = class_logits.shape[-1]

        # 获取每张图像的预测bbox数量
        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
        # 根据proposal以及预测的回归参数计算出最终bbox坐标
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        # 对预测类别结果进行softmax处理
        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        # 根据每张图像的预测bbox数量分割结果
        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        # 遍历每张图像预测信息
        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
            # 裁剪预测的boxes信息，将越界的坐标调整到图片边界上
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove prediction with the background label
            # 移除索引为0的所有信息（0代表背景）
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            # 移除低概率目标，self.scores_thresh=0.05
            # gt: Computes input > other element-wise.
            # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1)
            inds = torch.where(torch.gt(scores, self.score_thresh))[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            # 移除小目标
            keep = box_ops.remove_small_boxes(boxes, min_size=1.)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximun suppression, independently done per class
            # 执行nms处理，执行后的结果会按照scores从大到小进行排序返回
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)

            # keep only topk scoring predictions
            # 获取scores排在前topk个预测目标
            keep = keep[:self.detection_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels

    def forward(self,
                features,       # type: Dict[str, Tensor]
                proposals,      # type: List[Tensor]
                image_shapes,   # type: List[Tuple[int, int]]
                targets=None    # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """

        # 检查targets的数据类型是否正确
        if targets is not None:
            for t in targets:
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t["boxes"].dtype in floating_point_types, "target boxes must of float type"
                assert t["labels"].dtype == torch.int64, "target labels must of int64 type"

        if self.training:
            # 划分正负样本，统计对应gt的标签以及边界框回归信息
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
        else:
            labels = None
            regression_targets = None
            matched_idxs = None

        # 将采集样本通过Multi-scale RoIAlign pooling层
        # box_features_shape: [num_proposals, channel, height, width]
        box_features = self.box_roi_pool(features, proposals, image_shapes)

        # 通过roi_pooling后的两层全连接层
        # box_features_shape: [num_proposals, representation_size]
        box_features = self.box_head(box_features)

        # 接着分别预测目标类别和边界框回归参数
        class_logits, box_regression = self.box_predictor(box_features)

        result: List[Dict[str, torch.Tensor]] = []
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    {
                        "boxes": boxes[i],
                        "labels": labels[i],
                        "scores": scores[i],
                    }
                )

        if self.has_mask():
            mask_proposals = [p["boxes"] for p in result]  # 将最终预测的Boxes信息取出
            if self.training:
                # matched_idxs为每个proposal在正负样本匹配过程中得到的gt索引(背景的gt索引也默认设置成了0)
                if matched_idxs is None:
                    raise ValueError("if in training, matched_idxs should not be None")

                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.where(labels[img_id] > 0)[0]  # 寻找对应gt类别大于0，即正样本
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])
            else:
                pos_matched_idxs = None

            mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

            loss_mask = {}
            if self.training:
                if targets is None or pos_matched_idxs is None or mask_logits is None:
                    raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")

                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
                loss_mask = {"loss_mask": rcnn_loss_mask}
            else:
                labels = [r["labels"] for r in result]
                mask_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(mask_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        return result, losses


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/rpn_function.py
================================================
from typing import List, Optional, Dict, Tuple

import torch
from torch import nn, Tensor
from torch.nn import functional as F
import torchvision

from . import det_utils
from . import boxes as box_ops
from .image_list import ImageList


@torch.jit.unused
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
    # type: (Tensor, int) -> Tuple[int, int]
    from torch.onnx import operators
    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
    pre_nms_top_n = torch.min(torch.cat(
        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),
         num_anchors), 0))

    return num_anchors, pre_nms_top_n


class AnchorsGenerator(nn.Module):
    __annotations__ = {
        "cell_anchors": Optional[List[torch.Tensor]],
        "_cache": Dict[str, List[torch.Tensor]]
    }

    """
    anchors生成器
    Module that generates anchors for a set of feature maps and
    image sizes.

    The module support computing anchors at multiple sizes and aspect ratios
    per feature map.

    sizes and aspect_ratios should have the same number of elements, and it should
    correspond to the number of feature maps.

    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
    per spatial location for feature map i.

    Arguments:
        sizes (Tuple[Tuple[int]]):
        aspect_ratios (Tuple[Tuple[float]]):
    """

    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):
        super(AnchorsGenerator, self).__init__()

        if not isinstance(sizes[0], (list, tuple)):
            # TODO change this
            sizes = tuple((s,) for s in sizes)
        if not isinstance(aspect_ratios[0], (list, tuple)):
            aspect_ratios = (aspect_ratios,) * len(sizes)

        assert len(sizes) == len(aspect_ratios)

        self.sizes = sizes
        self.aspect_ratios = aspect_ratios
        self.cell_anchors = None
        self._cache = {}

    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")):
        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor
        """
        compute anchor sizes
        Arguments:
            scales: sqrt(anchor_area)
            aspect_ratios: h/w ratios
            dtype: float32
            device: cpu/gpu
        """
        scales = torch.as_tensor(scales, dtype=dtype, device=device)
        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
        h_ratios = torch.sqrt(aspect_ratios)
        w_ratios = 1.0 / h_ratios

        # [r1, r2, r3]' * [s1, s2, s3]
        # number of elements is len(ratios)*len(scales)
        ws = (w_ratios[:, None] * scales[None, :]).view(-1)
        hs = (h_ratios[:, None] * scales[None, :]).view(-1)

        # left-top, right-bottom coordinate relative to anchor center(0, 0)
        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]
        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2

        return base_anchors.round()  # round 四舍五入

    def set_cell_anchors(self, dtype, device):
        # type: (torch.dtype, torch.device) -> None
        if self.cell_anchors is not None:
            cell_anchors = self.cell_anchors
            assert cell_anchors is not None
            # suppose that all anchors have the same device
            # which is a valid assumption in the current state of the codebase
            if cell_anchors[0].device == device:
                return

        # 根据提供的sizes和aspect_ratios生成anchors模板
        # anchors模板都是以(0, 0)为中心的anchor
        cell_anchors = [
            self.generate_anchors(sizes, aspect_ratios, dtype, device)
            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)
        ]
        self.cell_anchors = cell_anchors

    def num_anchors_per_location(self):
        # 计算每个预测特征层上每个滑动窗口的预测目标数
        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]

    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
    def grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """
        anchors position in grid coordinate axis map into origin image
        计算预测特征图对应原始图像上的所有anchors的坐标
        Args:
            grid_sizes: 预测特征矩阵的height和width
            strides: 预测特征矩阵上一步对应原始图像上的步距
        """
        anchors = []
        cell_anchors = self.cell_anchors
        assert cell_anchors is not None

        # 遍历每个预测特征层的grid_size，strides和cell_anchors
        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
            grid_height, grid_width = size
            stride_height, stride_width = stride
            device = base_anchors.device

            # For output anchor, compute [x_center, y_center, x_center, y_center]
            # shape: [grid_width] 对应原图上的x坐标(列)
            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width
            # shape: [grid_height] 对应原图上的y坐标(行)
            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height

            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)
            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵
            # shape: [grid_height, grid_width]
            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
            shift_x = shift_x.reshape(-1)
            shift_y = shift_y.reshape(-1)

            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量
            # shape: [grid_width*grid_height, 4]
            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)

            # For every (base anchor, output anchor) pair,
            # offset each zero-centered base anchor by the center of the output anchor.
            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)
            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)
            anchors.append(shifts_anchor.reshape(-1, 4))

        return anchors  # List[Tensor(all_num_anchors, 4)]

    def cached_grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """将计算得到的所有anchors信息进行缓存"""
        key = str(grid_sizes) + str(strides)
        # self._cache是字典类型
        if key in self._cache:
            return self._cache[key]
        anchors = self.grid_anchors(grid_sizes, strides)
        self._cache[key] = anchors
        return anchors

    def forward(self, image_list, feature_maps):
        # type: (ImageList, List[Tensor]) -> List[Tensor]
        # 获取每个预测特征层的尺寸(height, width)
        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])

        # 获取输入图像的height和width
        image_size = image_list.tensors.shape[-2:]

        # 获取变量类型和设备类型
        dtype, device = feature_maps[0].dtype, feature_maps[0].device

        # one step in feature map equate n pixel stride in origin image
        # 计算特征层上的一步等于原始图像上的步长
        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),
                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]

        # 根据提供的sizes和aspect_ratios生成anchors模板
        self.set_cell_anchors(dtype, device)

        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）
        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息
        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)

        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
        # 遍历一个batch中的每张图像
        for i, (image_height, image_width) in enumerate(image_list.image_sizes):
            anchors_in_image = []
            # 遍历每张预测特征图映射回原图的anchors坐标信息
            for anchors_per_feature_map in anchors_over_all_feature_maps:
                anchors_in_image.append(anchors_per_feature_map)
            anchors.append(anchors_in_image)
        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起
        # anchors是个list，每个元素为一张图像的所有anchors信息
        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
        # Clear the cache in case that memory leaks.
        self._cache.clear()
        return anchors


class RPNHead(nn.Module):
    """
    add a RPN head with classification and regression
    通过滑动窗口计算预测目标概率与bbox regression参数

    Arguments:
        in_channels: number of channels of the input feature
        num_anchors: number of anchors to be predicted
    """

    def __init__(self, in_channels, num_anchors):
        super(RPNHead, self).__init__()
        # 3x3 滑动窗口
        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
        # 计算预测的目标分数（这里的目标只是指前景或者背景）
        self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)
        # 计算预测的目标bbox regression参数
        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1)

        for layer in self.children():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight, std=0.01)
                torch.nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        logits = []
        bbox_reg = []
        for i, feature in enumerate(x):
            t = F.relu(self.conv(feature))
            logits.append(self.cls_logits(t))
            bbox_reg.append(self.bbox_pred(t))
        return logits, bbox_reg


def permute_and_flatten(layer, N, A, C, H, W):
    # type: (Tensor, int, int, int, int, int) -> Tensor
    """
    调整tensor顺序，并进行reshape
    Args:
        layer: 预测特征层上预测的目标概率或bboxes regression参数
        N: batch_size
        A: anchors_num_per_position
        C: classes_num or 4(bbox coordinate)
        H: height
        W: width

    Returns:
        layer: 调整tensor顺序，并reshape后的结果[N, -1, C]
    """
    # view和reshape功能是一样的，先展平所有元素在按照给定shape排列
    # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数
    # reshape则不需要依赖目标tensor是否在内存中是连续的
    # [batch_size, anchors_num_per_position * (C or 4), height, width]
    layer = layer.view(N, -1, C,  H, W)
    # 调换tensor维度
    layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]
    layer = layer.reshape(N, -1, C)
    return layer


def concat_box_prediction_layers(box_cls, box_regression):
    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    对box_cla和box_regression两个list中的每个预测特征层的预测信息
    的tensor排列顺序以及shape进行调整 -> [N, -1, C]
    Args:
        box_cls: 每个预测特征层上的预测目标概率
        box_regression: 每个预测特征层上的预测目标bboxes regression参数

    Returns:

    """
    box_cls_flattened = []
    box_regression_flattened = []

    # 遍历每个预测特征层
    for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):
        # [batch_size, anchors_num_per_position * classes_num, height, width]
        # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景
        N, AxC, H, W = box_cls_per_level.shape
        # # [batch_size, anchors_num_per_position * 4, height, width]
        Ax4 = box_regression_per_level.shape[1]
        # anchors_num_per_position
        A = Ax4 // 4
        # classes_num
        C = AxC // A

        # [N, -1, C]
        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W)
        box_cls_flattened.append(box_cls_per_level)

        # [N, -1, C]
        box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W)
        box_regression_flattened.append(box_regression_per_level)

    box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)  # start_dim, end_dim
    box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)
    return box_cls, box_regression


class RegionProposalNetwork(torch.nn.Module):
    """
    Implements Region Proposal Network (RPN).

    Arguments:
        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        head (nn.Module): module that computes the objectness and regression deltas
        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training of the RPN.
        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training of the RPN.
        batch_size_per_image (int): number of anchors that are sampled during training of the RPN
            for computing the loss
        positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
        pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should
            contain two fields: training and testing, to allow for different values depending
            on training or evaluation
        post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should
            contain two fields: training and testing, to allow for different values depending
            on training or evaluation
        nms_thresh (float): NMS threshold used for postprocessing the RPN proposals

    """
    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
        'pre_nms_top_n': Dict[str, int],
        'post_nms_top_n': Dict[str, int],
    }

    def __init__(self, anchor_generator, head,
                 fg_iou_thresh, bg_iou_thresh,
                 batch_size_per_image, positive_fraction,
                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_generator = anchor_generator
        self.head = head
        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        # use during training
        # 计算anchors与真实bbox的iou
        self.box_similarity = box_ops.box_iou

        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,  # 当iou大于fg_iou_thresh(0.7)时视为正样本
            bg_iou_thresh,  # 当iou小于bg_iou_thresh(0.3)时视为负样本
            allow_low_quality_matches=True
        )

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction  # 256, 0.5
        )

        # use during testing
        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
        self.score_thresh = score_thresh
        self.min_size = 1.

    def pre_nms_top_n(self):
        if self.training:
            return self._pre_nms_top_n['training']
        return self._pre_nms_top_n['testing']

    def post_nms_top_n(self):
        if self.training:
            return self._post_nms_top_n['training']
        return self._post_nms_top_n['testing']

    def assign_targets_to_anchors(self, anchors, targets):
        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]
        """
        计算每个anchors最匹配的gt，并划分为正样本，背景以及废弃的样本
        Args：
            anchors: (List[Tensor])
            targets: (List[Dict[Tensor])
        Returns:
            labels: 标记anchors归属类别（1, 0, -1分别对应正样本，背景，废弃的样本）
                    注意，在RPN中只有前景和背景，所有正样本的类别都是1，0代表背景
            matched_gt_boxes：与anchors匹配的gt
        """
        labels = []
        matched_gt_boxes = []
        # 遍历每张图像的anchors和targets
        for anchors_per_image, targets_per_image in zip(anchors, targets):
            gt_boxes = targets_per_image["boxes"]
            if gt_boxes.numel() == 0:
                device = anchors_per_image.device
                matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)
                labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)
            else:
                # 计算anchors与真实bbox的iou信息
                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
                match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)
                # 计算每个anchors与gt匹配iou最大的索引（如果iou<0.3索引置为-1，0.3<iou<0.7索引为-2）
                matched_idxs = self.proposal_matcher(match_quality_matrix)
                # get the targets corresponding GT for each proposal
                # NB: need to clamp the indices because we can have a single
                # GT in the image, and matched_idxs can be -2, which goes
                # out of bounds
                matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]

                labels_per_image = matched_idxs >= 0
                labels_per_image = labels_per_image.to(dtype=torch.float32)

                # background (negative examples)
                bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1
                labels_per_image[bg_indices] = 0.0

                # discard indices that are between thresholds
                inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2
                labels_per_image[inds_to_discard] = -1.0

            labels.append(labels_per_image)
            matched_gt_boxes.append(matched_gt_boxes_per_image)
        return labels, matched_gt_boxes

    def _get_top_n_idx(self, objectness, num_anchors_per_level):
        # type: (Tensor, List[int]) -> Tensor
        """
        获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值
        Args:
            objectness: Tensor(每张图像的预测目标概率信息 )
            num_anchors_per_level: List（每个预测特征层上的预测的anchors个数）
        Returns:

        """
        r = []  # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息
        offset = 0
        # 遍历每个预测特征层上的预测目标概率信息
        for ob in objectness.split(num_anchors_per_level, 1):
            if torchvision._is_tracing():
                num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())
            else:
                num_anchors = ob.shape[1]  # 预测特征层上的预测的anchors个数
                pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)

            # Returns the k largest elements of the given input tensor along a given dimension
            _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)
            r.append(top_n_idx + offset)
            offset += num_anchors
        return torch.cat(r, dim=1)

    def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]
        """
        筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标
        Args:
            proposals: 预测的bbox坐标
            objectness: 预测的目标概率
            image_shapes: batch中每张图片的size信息
            num_anchors_per_level: 每个预测特征层上预测anchors的数目

        Returns:

        """
        num_images = proposals.shape[0]
        device = proposals.device

        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        # Returns a tensor of size size filled with fill_value
        # levels负责记录分隔不同预测特征层上的anchors索引信息
        levels = [torch.full((n, ), idx, dtype=torch.int64, device=device)
                  for idx, n in enumerate(num_anchors_per_level)]
        levels = torch.cat(levels, 0)

        # Expand this tensor to the same size as objectness
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]  # [batch_size, 1]

        # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息
        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息
        proposals = proposals[batch_idx, top_n_idx]

        objectness_prob = torch.sigmoid(objectness)

        final_boxes = []
        final_scores = []
        # 遍历每张图像的相关预测信息
        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):
            # 调整预测的boxes信息，将越界的坐标调整到图片边界上
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)

            # 返回boxes满足宽，高都大于min_size的索引
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # 移除小概率boxes，参考下面这个链接
            # https://github.com/pytorch/vision/pull/3205
            keep = torch.where(torch.ge(scores, self.score_thresh))[0]  # ge: >=
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)

            # keep only topk scoring predictions
            keep = keep[: self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]

            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores

    def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):
        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
        """
        计算RPN损失，包括类别损失（前景与背景），bbox regression损失
        Arguments:
            objectness (Tensor)：预测的前景概率
            pred_bbox_deltas (Tensor)：预测的bbox regression
            labels (List[Tensor])：真实的标签 1, 0, -1（batch中每一张图片的labels对应List的一个元素中）
            regression_targets (List[Tensor])：真实的bbox regression

        Returns:
            objectness_loss (Tensor) : 类别损失
            box_loss (Tensor)：边界框回归损失
        """
        # 按照给定的batch_size_per_image, positive_fraction选择正负样本
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起，并获取非零位置的索引
        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0]
        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0]

        # 将所有正负样本索引拼接在一起
        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
        objectness = objectness.flatten()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        # 计算边界框回归损失
        box_loss = det_utils.smooth_l1_loss(
            pred_bbox_deltas[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        # 计算目标预测概率损失
        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss

    def forward(self,
                images,        # type: ImageList
                features,      # type: Dict[str, Tensor]
                targets=None   # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]
        """
        Arguments:
            images (ImageList): images for which we want to compute the predictions
            features (Dict[Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.

        Returns:
            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
                image.
            losses (Dict[Tensor]): the losses for the model during training. During
                testing, it is an empty dict.
        """
        # RPN uses all feature maps that are available
        # features是所有预测特征层组成的OrderedDict
        features = list(features.values())

        # 计算每个预测特征层上的预测目标概率和bboxes regression参数
        # objectness和pred_bbox_deltas都是list
        objectness, pred_bbox_deltas = self.head(features)

        # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size
        anchors = self.anchor_generator(images, features)

        # batch_size
        num_images = len(anchors)

        # numel() Returns the total number of elements in the input tensor.
        # 计算每个预测特征层上的对应的anchors数量
        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]

        # 调整内部tensor格式以及shape
        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness,
                                                                    pred_bbox_deltas)

        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标
        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)

        # 筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标
        boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)

        losses = {}
        if self.training:
            assert targets is not None
            # 计算每个anchors最匹配的gt，并将anchors进行分类，前景，背景以及废弃的anchors
            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
            # 结合anchors以及对应的gt，计算regression参数
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets
            )
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg
            }
        return boxes, losses


================================================
FILE: pytorch_object_detection/mask_rcnn/network_files/transform.py
================================================
import math
from typing import List, Tuple, Dict, Optional

import torch
from torch import nn, Tensor
import torch.nn.functional as F
import torchvision

from .image_list import ImageList


def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
    one = torch.ones(1, dtype=torch.int64)
    zero = torch.zeros(1, dtype=torch.int64)

    w = box[2] - box[0] + one
    h = box[3] - box[1] + one
    w = torch.max(torch.cat((w, one)))
    h = torch.max(torch.cat((h, one)))

    # Set shape to [batchxCxHxW]
    mask = mask.expand((1, 1, mask.size(0), mask.size(1)))

    # Resize mask
    mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
    mask = mask[0][0]

    x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
    x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
    y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
    y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))

    unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]

    # TODO : replace below with a dynamic padding when support is added in ONNX

    # pad y
    zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
    zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
    concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
    # pad x
    zeros_x0 = torch.zeros(concat_0.size(0), x_0)
    zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
    im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
    return im_mask


@torch.jit._script_if_tracing
def _onnx_paste_mask_in_image_loop(masks, boxes, im_h, im_w):
    res_append = torch.zeros(0, im_h, im_w)
    for i in range(masks.size(0)):
        mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
        mask_res = mask_res.unsqueeze(0)
        res_append = torch.cat((res_append, mask_res))

    return res_append


@torch.jit.unused
def _get_shape_onnx(image: Tensor) -> Tensor:
    from torch.onnx import operators

    return operators.shape_as_tensor(image)[-2:]


@torch.jit.unused
def _fake_cast_onnx(v: Tensor) -> float:
    # ONNX requires a tensor but here we fake its type for JIT.
    return v


def _resize_image_and_masks(image: Tensor,
                            self_min_size: float,
                            self_max_size: float,
                            target: Optional[Dict[str, Tensor]] = None,
                            fixed_size: Optional[Tuple[int, int]] = None
                            ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:

    if torchvision._is_tracing():
        im_shape = _get_shape_onnx(image)
    else:
        im_shape = torch.tensor(image.shape[-2:])

    size: Optional[List[int]] = None
    scale_factor: Optional[float] = None
    recompute_scale_factor: Optional[bool] = None
    if fixed_size is not None:
        size = [fixed_size[1], fixed_size[0]]
    else:
        min_size = torch.min(im_shape).to(dtype=torch.float32)  # 获取高宽中的最小值
        max_size = torch.max(im_shape).to(dtype=torch.float32)  # 获取高宽中的最大值
        scale = torch.min(self_min_size / min_size, self_max_size / max_size)  # 计算缩放比例

        if torchvision._is_tracing():
            scale_factor = _fake_cast_onnx(scale)
        else:
            scale_factor = scale.item()
        recompute_scale_factor = True

    # interpolate利用插值的方法缩放图片
    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]
    # bilinear只支持4D Tensor
    image = torch.nn.functional.interpolate(
        image[None],
        size=size,
        scale_factor=scale_factor,
        mode="bilinear",
        recompute_scale_factor=recompute_scale_factor,
        align_corners=False)[0]

    if target is None:
        return image, target

    if "masks" in target:
        mask = target["masks"]
        mask = torch.nn.functional.interpolate(
            mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor
        )[:, 0].byte()  # self.byte() is equivalent to self.to(torch.uint8).
        target["masks"] = mask

    return image, target


def _onnx_expand_boxes(boxes, scale):
    # type: (Tensor, float) -> Tensor
    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5

    w_half = w_half.to(dtype=torch.float32) * scale
    h_half = h_half.to(dtype=torch.float32) * scale

    boxes_exp0 = x_c - w_half
    boxes_exp1 = y_c - h_half
    boxes_exp2 = x_c + w_half
    boxes_exp3 = y_c + h_half
    boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
    return boxes_exp


# the next two functions should be merged inside Masker
# but are kept here for the moment while we need them
# temporarily for paste_mask_in_image
def expand_boxes(boxes, scale):
    # type: (Tensor, float) -> Tensor
    if torchvision._is_tracing():
        return _onnx_expand_boxes(boxes, scale)
    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5

    w_half *= scale
    h_half *= scale

    boxes_exp = torch.zeros_like(boxes)
    boxes_exp[:, 0] = x_c - w_half
    boxes_exp[:, 2] = x_c + w_half
    boxes_exp[:, 1] = y_c - h_half
    boxes_exp[:, 3] = y_c + h_half
    return boxes_exp


@torch.jit.unused
def expand_masks_tracing_scale(M, padding):
    # type: (int, int) -> float
    return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)


def expand_masks(mask, padding):
    # type: (Tensor, int) -> Tuple[Tensor, float]
    M = mask.shape[-1]
    if torch._C._get_tracing_state():  # could not import is_tracing(), not sure why
        scale = expand_masks_tracing_scale(M, padding)
    else:
        scale = float(M + 2 * padding) / M
    padded_mask = F.pad(mask, (padding,) * 4)
    return padded_mask, scale


def paste_mask_in_image(mask, box, im_h, im_w):
    # type: (Tensor, Tensor, int, int) -> Tensor

    # refer to: https://github.com/pytorch/vision/issues/5845
    TO_REMOVE = 1
    w = int(box[2] - box[0] + TO_REMOVE)
    h = int(box[3] - box[1] + TO_REMOVE)
    w = max(w, 1)
    h = max(h, 1)

    # Set shape to [batch, C, H, W]
    # 因为后续的bilinear操作只支持4-D的Tensor
    mask = mask.expand((1, 1, -1, -1))  # -1 means not changing the size of that dimension

    # Resize mask
    mask = F.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False)
    mask = mask[0][0]  # [batch, C, H, W] -> [H, W]

    im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
    # 填入原图的目标区域(防止越界)
    x_0 = max(box[0], 0)
    x_1 = min(box[2] + 1, im_w)
    y_0 = max(box[1], 0)
    y_1 = min(box[3] + 1, im_h)

    # 将resize后的mask填入对应目标区域
    im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])]
    return im_mask


def paste_masks_in_image(masks, boxes, img_shape, padding=1):
    # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor

    # pytorch官方说对mask进行expand能够略微提升mAP
    # refer to: https://github.com/pytorch/vision/issues/5845
    masks, scale = expand_masks(masks, padding=padding)
    boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
    im_h, im_w = img_shape

    if torchvision._is_tracing():
        return _onnx_paste_mask_in_image_loop(
            masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
        )[:, None]
    res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
    if len(res) > 0:
        ret = torch.stack(res, dim=0)[:, None]  # [num_obj, 1, H, W]
    else:
        ret = masks.new_empty((0, 1, im_h, im_w))
    return ret


class GeneralizedRCNNTransform(nn.Module):
    """
    Performs input / target transformation before feeding the data to a GeneralizedRCNN
    model.

    The transformations it perform are:
        - input normalization (mean subtraction and std division)
        - input / target resizing to match min_size / max_size

    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
    """

    def __init__(self,
                 min_size: int,
                 max_size: int,
                 image_mean: List[float],
                 image_std: List[float],
                 size_divisible: int = 32,
                 fixed_size: Optional[Tuple[int, int]] = None):
        super().__init__()
        if not isinstance(min_size, (list, tuple)):
            min_size = (min_size,)
        self.min_size = min_size      # 指定图像的最小边长范围
        self.max_size = max_size      # 指定图像的最大边长范围
        self.image_mean = image_mean  # 指定图像在标准化处理中的均值
        self.image_std = image_std    # 指定图像在标准化处理中的方差
        self.size_divisible = size_divisible
        self.fixed_size = fixed_size

    def normalize(self, image):
        """标准化处理"""
        dtype, device = image.dtype, image.device
        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
        # [:, None, None]: shape [3] -> [3, 1, 1]
        return (image - mean[:, None, None]) / std[:, None, None]

    def torch_choice(self, k):
        # type: (List[int]) -> int
        """
        Implements `random.choice` via torch ops so it can be compiled with
        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
        is fixed.
        """
        index = int(torch.empty(1).uniform_(0., float(len(k))).item())
        return k[index]

    def resize(self, image, target):
        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
        """
        将图片缩放到指定的大小范围内，并对应缩放bboxes信息
        Args:
            image: 输入的图片
            target: 输入图片的相关信息（包括bboxes信息）

        Returns:
            image: 缩放后的图片
            target: 缩放bboxes后的图片相关信息
        """
        # image shape is [channel, height, width]
        h, w = image.shape[-2:]

        if self.training:
            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size
        else:
            # FIXME assume for now that testing uses the largest scale
            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size

        image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size)

        if target is None:
            return image, target

        bbox = target["boxes"]
        # 根据图像的缩放比例来缩放bbox
        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])
        target["boxes"] = bbox

        return image, target

    # _onnx_batch_images() is an implementation of
    # batch_images() that is supported by ONNX tracing.
    @torch.jit.unused
    def _onnx_batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        max_size = []
        for i in range(images[0].dim()):
            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
            max_size.append(max_size_i)
        stride = size_divisible
        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size = tuple(max_size)

        # work around for
        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
        # which is not yet supported in onnx
        padded_imgs = []
        for img in images:
            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])
            padded_imgs.append(padded_img)

        return torch.stack(padded_imgs)

    def max_by_axis(self, the_list):
        # type: (List[List[int]]) -> List[int]
        maxes = the_list[0]
        for sublist in the_list[1:]:
            for index, item in enumerate(sublist):
                maxes[index] = max(maxes[index], item)
        return maxes

    def batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        """
        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）
        Args:
            images: 输入的一批图片
            size_divisible: 将图像高和宽调整到该数的整数倍

        Returns:
            batched_imgs: 打包成一个batch后的tensor数据
        """

        if torchvision._is_tracing():
            # batch_images() does not export well to ONNX
            # call _onnx_batch_images() instead
            return self._onnx_batch_images(images, size_divisible)

        # 分别计算一个batch中所有图片中的最大channel, height, width
        max_size = self.max_by_axis([list(img.shape) for img in images])

        stride = float(size_divisible)
        # max_size = list(max_size)
        # 将height向上调整到stride的整数倍
        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
        # 将width向上调整到stride的整数倍
        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)

        # [batch, channel, height, width]
        batch_shape = [len(images)] + max_size

        # 创建shape为batch_shape且值全部为0的tensor
        batched_imgs = images[0].new_full(batch_shape, 0)
        for img, pad_img in zip(images, batched_imgs):
            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变
            # 这样保证输入到网络中一个batch的每张图片的shape相同
            # copy_: Copies the elements from src into self tensor and returns self
            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)

        return batched_imgs

    def postprocess(self,
                    result,                # type: List[Dict[str, Tensor]]
                    image_shapes,          # type: List[Tuple[int, int]]
                    original_image_sizes   # type: List[Tuple[int, int]]
                    ):
        # type: (...) -> List[Dict[str, Tensor]]
        """
        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        Args:
            result: list(dict), 网络的预测结果, len(result) == batch_size
            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size
            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size

        Returns:

        """
        if self.training:
            return result

        # 遍历每张图片的预测信息，将boxes信息还原回原尺度
        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
            boxes = pred["boxes"]
            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上
            result[i]["boxes"] = boxes
            if "masks" in pred:
                masks = pred["masks"]
                # 将mask映射回原图尺度
                masks = paste_masks_in_image(masks, boxes, o_im_s)
                result[i]["masks"] = masks

        return result

    def __repr__(self):
        """自定义输出实例化对象的信息，可通过print打印实例信息"""
        format_string = self.__class__.__name__ + '('
        _indent = '\n    '
        format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std)
        format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size,
                                                                                         self.max_size)
        format_string += '\n)'
        return format_string

    def forward(self,
                images,       # type: List[Tensor]
                targets=None  # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]
        images = [img for img in images]
        for i in range(len(images)):
            image = images[i]
            target_index = targets[i] if targets is not None else None

            if image.dim() != 3:
                raise ValueError("images is expected to be a list of 3d tensors "
                                 "of shape [C, H, W], got {}".format(image.shape))
            image = self.normalize(image)  # 对图像进行标准化处理
            image, target_index = self.resize(image, target_index)  # 对图像和对应的bboxes缩放到指定范围
            images[i] = image
            if targets is not None and target_index is not None:
                targets[i] = target_index

        # 记录resize后的图像尺寸
        image_sizes = [img.shape[-2:] for img in images]
        images = self.batch_images(images, self.size_divisible)  # 将images打包成一个batch
        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])

        for image_size in image_sizes:
            assert len(image_size) == 2
            image_sizes_list.append((image_size[0], image_size[1]))

        image_list = ImageList(images, image_sizes_list)
        return image_list, targets


def resize_boxes(boxes, original_size, new_size):
    # type: (Tensor, List[int], List[int]) -> Tensor
    """
    将boxes参数根据图像的缩放情况进行相应缩放

    Arguments:
        original_size: 图像缩放前的尺寸
        new_size: 图像缩放后的尺寸
    """
    ratios = [
        torch.tensor(s, dtype=torch.float32, device=boxes.device) /
        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
        for s, s_orig in zip(new_size, original_size)
    ]
    ratios_height, ratios_width = ratios
    # Removes a tensor dimension, boxes [minibatch, 4]
    # Returns a tuple of all slices along a given dimension, already without it.
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    xmin = xmin * ratios_width
    xmax = xmax * ratios_width
    ymin = ymin * ratios_height
    ymax = ymax * ratios_height
    return torch.stack((xmin, ymin, xmax, ymax), dim=1)


================================================
FILE: pytorch_object_detection/mask_rcnn/pascal_voc_indices.json
================================================
{
    "1": "aeroplane",
    "2": "bicycle",
    "3": "bird",
    "4": "boat",
    "5": "bottle",
    "6": "bus",
    "7": "car",
    "8": "cat",
    "9": "chair",
    "10": "cow",
    "11": "diningtable",
    "12": "dog",
    "13": "horse",
    "14": "motorbike",
    "15": "person",
    "16": "pottedplant",
    "17": "sheep",
    "18": "sofa",
    "19": "train",
    "20": "tvmonitor"
}

================================================
FILE: pytorch_object_detection/mask_rcnn/plot_curve.py
================================================
import datetime
import matplotlib.pyplot as plt


def plot_loss_and_lr(train_loss, learning_rate):
    try:
        x = list(range(len(train_loss)))
        fig, ax1 = plt.subplots(1, 1)
        ax1.plot(x, train_loss, 'r', label='loss')
        ax1.set_xlabel("step")
        ax1.set_ylabel("loss")
        ax1.set_title("Train Loss and lr")
        plt.legend(loc='best')

        ax2 = ax1.twinx()
        ax2.plot(x, learning_rate, label='lr')
        ax2.set_ylabel("learning rate")
        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
        plt.legend(loc='best')

        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
        plt.close()
        print("successful save loss curve! ")
    except Exception as e:
        print(e)


def plot_map(mAP):
    try:
        x = list(range(len(mAP)))
        plt.plot(x, mAP, label='mAp')
        plt.xlabel('epoch')
        plt.ylabel('mAP')
        plt.title('Eval mAP')
        plt.xlim(0, len(mAP))
        plt.legend(loc='best')
        plt.savefig('./mAP.png')
        plt.close()
        print("successful save mAP curve!")
    except Exception as e:
        print(e)


================================================
FILE: pytorch_object_detection/mask_rcnn/predict.py
================================================
import os
import time
import json

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
from torchvision import transforms

from network_files import MaskRCNN
from backbone import resnet50_fpn_backbone
from draw_box_utils import draw_objs


def create_model(num_classes, box_thresh=0.5):
    backbone = resnet50_fpn_backbone()
    model = MaskRCNN(backbone,
                     num_classes=num_classes,
                     rpn_score_thresh=box_thresh,
                     box_score_thresh=box_thresh)

    return model


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    num_classes = 90  # 不包含背景
    box_thresh = 0.5
    weights_path = "./save_weights/model_25.pth"
    img_path = "./test.jpg"
    label_json_path = './coco91_indices.json'

    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = create_model(num_classes=num_classes + 1, box_thresh=box_thresh)

    # load train weights
    assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    # read class_indict
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as json_file:
        category_index = json.load(json_file)

    # load image
    assert os.path.exists(img_path), f"{img_path} does not exits."
    original_img = Image.open(img_path).convert('RGB')

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.ToTensor()])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        predictions = model(img.to(device))[0]
        t_end = time_synchronized()
        print("inference+NMS time: {}".format(t_end - t_start))

        predict_boxes = predictions["boxes"].to("cpu").numpy()
        predict_classes = predictions["labels"].to("cpu").numpy()
        predict_scores = predictions["scores"].to("cpu").numpy()
        predict_mask = predictions["masks"].to("cpu").numpy()
        predict_mask = np.squeeze(predict_mask, axis=1)  # [batch, 1, h, w] -> [batch, h, w]

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")
            return

        plot_img = draw_objs(original_img,
                             boxes=predict_boxes,
                             classes=predict_classes,
                             scores=predict_scores,
                             masks=predict_mask,
                             category_index=category_index,
                             line_thickness=3,
                             font='arial.ttf',
                             font_size=20)
        plt.imshow(plot_img)
        plt.show()
        # 保存预测的图片结果
        plot_img.save("test_result.jpg")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/mask_rcnn/requirements.txt
================================================
lxml
matplotlib
numpy
tqdm
pycocotools
Pillow
torch==1.13.1
torchvision==0.11.1


================================================
FILE: pytorch_object_detection/mask_rcnn/seg_results20220406-141544.txt
================================================
epoch:0 0.172  0.321  0.167  0.065  0.195  0.250  0.188  0.307  0.324  0.147  0.366  0.440  1.3826  0.08
epoch:1 0.223  0.395  0.225  0.092  0.249  0.322  0.222  0.354  0.372  0.186  0.413  0.499  1.0356  0.08
epoch:2 0.235  0.408  0.241  0.100  0.258  0.350  0.230  0.372  0.392  0.204  0.429  0.517  0.9718  0.08
epoch:3 0.246  0.426  0.252  0.103  0.267  0.357  0.241  0.386  0.408  0.225  0.448  0.521  0.9363  0.08
epoch:4 0.250  0.424  0.257  0.106  0.272  0.367  0.242  0.381  0.400  0.210  0.438  0.530  0.9145  0.08
epoch:5 0.255  0.434  0.262  0.109  0.279  0.375  0.242  0.379  0.398  0.209  0.433  0.534  0.8982  0.08
epoch:6 0.270  0.456  0.283  0.120  0.293  0.392  0.254  0.403  0.421  0.229  0.462  0.551  0.8859  0.08
epoch:7 0.269  0.455  0.280  0.118  0.296  0.388  0.257  0.402  0.421  0.228  0.454  0.564  0.8771  0.08
epoch:8 0.276  0.465  0.290  0.120  0.301  0.398  0.255  0.401  0.418  0.227  0.461  0.553  0.8685  0.08
epoch:9 0.271  0.458  0.282  0.113  0.297  0.404  0.253  0.398  0.417  0.211  0.460  0.570  0.8612  0.08
epoch:10 0.277  0.463  0.289  0.119  0.299  0.410  0.258  0.405  0.425  0.221  0.466  0.558  0.8547  0.08
epoch:11 0.276  0.463  0.287  0.122  0.304  0.405  0.259  0.406  0.425  0.236  0.466  0.559  0.8498  0.08
epoch:12 0.276  0.464  0.288  0.127  0.294  0.409  0.257  0.406  0.425  0.236  0.459  0.563  0.8461  0.08
epoch:13 0.284  0.477  0.296  0.124  0.311  0.412  0.262  0.407  0.429  0.229  0.474  0.555  0.8409  0.08
epoch:14 0.277  0.464  0.292  0.121  0.304  0.397  0.257  0.410  0.431  0.238  0.473  0.565  0.8355  0.08
epoch:15 0.282  0.474  0.296  0.121  0.308  0.413  0.264  0.411  0.432  0.231  0.473  0.575  0.833  0.08
epoch:16 0.336  0.549  0.356  0.149  0.367  0.491  0.288  0.451  0.473  0.269  0.519  0.620  0.7421  0.008
epoch:17 0.339  0.553  0.360  0.153  0.371  0.496  0.292  0.454  0.475  0.271  0.518  0.624  0.7157  0.008
epoch:18 0.340  0.553  0.361  0.150  0.371  0.494  0.290  0.453  0.473  0.269  0.516  0.620  0.7016  0.008
epoch:19 0.341  0.555  0.363  0.154  0.372  0.500  0.293  0.458  0.478  0.273  0.522  0.630  0.6897  0.008
epoch:20 0.340  0.554  0.361  0.154  0.370  0.496  0.289  0.450  0.471  0.266  0.514  0.622  0.6802  0.008
epoch:21 0.338  0.552  0.358  0.151  0.367  0.500  0.289  0.447  0.467  0.262  0.507  0.622  0.6708  0.008
epoch:22 0.340  0.553  0.360  0.151  0.370  0.500  0.290  0.450  0.470  0.267  0.513  0.623  0.6497  0.0008
epoch:23 0.340  0.552  0.361  0.151  0.369  0.500  0.290  0.449  0.468  0.266  0.509  0.619  0.6447  0.0008
epoch:24 0.339  0.552  0.359  0.150  0.369  0.500  0.290  0.448  0.468  0.264  0.510  0.619  0.6421  0.0008
epoch:25 0.338  0.551  0.359  0.152  0.367  0.500  0.289  0.448  0.467  0.264  0.509  0.618  0.6398  0.0008


================================================
FILE: pytorch_object_detection/mask_rcnn/train.py
================================================
import os
import datetime

import torch
from torchvision.ops.misc import FrozenBatchNorm2d

import transforms
from network_files import MaskRCNN
from backbone import resnet50_fpn_backbone
from my_dataset_coco import CocoDetection
from my_dataset_voc import VOCInstances
from train_utils import train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups


def create_model(num_classes, load_pretrain_weights=True):
    # 如果GPU显存很小，batch_size不能设置很大，建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d)
    # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新
    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练
    # backbone = resnet50_fpn_backbone(norm_layer=FrozenBatchNorm2d,
    #                                  trainable_layers=3)
    # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth
    backbone = resnet50_fpn_backbone(pretrain_path="resnet50.pth", trainable_layers=3)

    model = MaskRCNN(backbone, num_classes=num_classes)

    if load_pretrain_weights:
        # coco weights url: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth"
        weights_dict = torch.load("./maskrcnn_resnet50_fpn_coco.pth", map_location="cpu")
        for k in list(weights_dict.keys()):
            if ("box_predictor" in k) or ("mask_fcn_logits" in k):
                del weights_dict[k]

        print(model.load_state_dict(weights_dict, strict=False))

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    det_results_file = f"det_results{now}.txt"
    seg_results_file = f"seg_results{now}.txt"

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    data_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> instances_train2017.json
    train_dataset = CocoDetection(data_root, "train", data_transform["train"])
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    # train_dataset = VOCInstances(data_root, year="2012", txt_name="train.txt", transforms=data_transform["train"])
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # coco2017 -> annotations -> instances_val2017.json
    val_dataset = CocoDetection(data_root, "val", data_transform["val"])
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt", transforms=data_transform["val"])
    val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=nw,
                                                  collate_fn=train_dataset.collate_fn)

    # create model num_classes equal background + classes
    model = create_model(num_classes=args.num_classes + 1, load_pretrain_weights=args.pretrain)
    model.to(device)

    train_loss = []
    learning_rate = []
    val_map = []

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=args.lr_steps,
                                                        gamma=args.lr_gamma)
    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 50 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device, epoch, print_freq=50,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        det_info, seg_info = utils.evaluate(model, val_data_loader, device=device)

        # write detection into txt
        with open(det_results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in det_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        # write seg into txt
        with open(seg_results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in seg_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(det_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/model_{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    # 学习率
    parser.add_argument('--lr', default=0.004, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练的batch size(如果内存/GPU显存充裕，建议设置更大)
    parser.add_argument('--batch_size', default=2, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    parser.add_argument("--pretrain", type=bool, default=True, help="load COCO pretrain weights.")
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/mask_rcnn/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch
from torchvision.ops.misc import FrozenBatchNorm2d

import transforms
from my_dataset_coco import CocoDetection
from my_dataset_voc import VOCInstances
from backbone import resnet50_fpn_backbone
from network_files import MaskRCNN
import train_utils.train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir


def create_model(num_classes, load_pretrain_weights=True):
    # 如果GPU显存很小，batch_size不能设置很大，建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d)
    # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新
    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练
    # backbone = resnet50_fpn_backbone(norm_layer=FrozenBatchNorm2d,
    #                                  trainable_layers=3)
    # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth
    backbone = resnet50_fpn_backbone(pretrain_path="resnet50.pth", trainable_layers=3)
    model = MaskRCNN(backbone, num_classes=num_classes)

    if load_pretrain_weights:
        # coco weights url: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth"
        weights_dict = torch.load("./maskrcnn_resnet50_fpn_coco.pth", map_location="cpu")
        for k in list(weights_dict.keys()):
            if ("box_predictor" in k) or ("mask_fcn_logits" in k):
                del weights_dict[k]

        print(model.load_state_dict(weights_dict, strict=False))

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # 用来保存coco_info的文件
    now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    det_results_file = f"det_results{now}.txt"
    seg_results_file = f"seg_results{now}.txt"

    # Data loading code
    print("Loading data")

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    COCO_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> instances_train2017.json
    train_dataset = CocoDetection(COCO_root, "train", data_transform["train"])
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    # train_dataset = VOCInstances(data_root, year="2012", txt_name="train.txt")

    # load validation data set
    # coco2017 -> annotations -> instances_val2017.json
    val_dataset = CocoDetection(COCO_root, "val", data_transform["val"])
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt")

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    if args.aspect_ratio_group_factor >= 0:
        # 统计所有图像比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + classes
    model = create_model(num_classes=args.num_classes + 1, load_pretrain_weights=args.pretrain)
    model.to(device)

    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        utils.evaluate(model, data_loader_test, device=device)
        return

    train_loss = []
    learning_rate = []
    val_map = []

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,
                                              device, epoch, args.print_freq,
                                              warmup=True, scaler=scaler)

        # update learning rate
        lr_scheduler.step()

        # evaluate after every epoch
        det_info, seg_info = utils.evaluate(model, data_loader_test, device=device)

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            train_loss.append(mean_loss.item())
            learning_rate.append(lr)
            val_map.append(det_info[1])  # pascal mAP

            # write into txt
            with open(det_results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [f"{i:.4f}" for i in det_info + [mean_loss.item()]] + [f"{lr:.6f}"]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

            with open(seg_results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [f"{i:.4f}" for i in seg_info + [mean_loss.item()]] + [f"{lr:.6f}"]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

        if args.output_dir:
            # 只在主进程上执行保存权重操作
            save_files = {'model': model_without_ddp.state_dict(),
                          'optimizer': optimizer.state_dict(),
                          'lr_scheduler': lr_scheduler.state_dict(),
                          'args': args,
                          'epoch': epoch}
            if args.amp:
                save_files["scaler"] = scaler.state_dict()
            save_on_master(save_files,
                           os.path.join(args.output_dir, f'model_{epoch}.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    if args.rank in [-1, 0]:
        # plot loss and lr curve
        if len(train_loss) != 0 and len(learning_rate) != 0:
            from plot_curve import plot_loss_and_lr
            plot_loss_and_lr(train_loss, learning_rate)

        # plot mAP curve
        if len(val_map) != 0:
            from plot_curve import plot_map
            plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(coco2017)
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / bs * num_GPU
    parser.add_argument('--lr', default=0.005, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.StepLR的参数
    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=50, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    parser.add_argument('--test-only', action="store_true", help="test only")

    # 开启的进程数(注意不是线程)
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    parser.add_argument("--sync-bn", dest="sync_bn", help="Use sync batch norm", type=bool, default=False)
    parser.add_argument("--pretrain", type=bool, default=True, help="load COCO pretrain weights.")
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/mask_rcnn/train_utils/__init__.py
================================================
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .distributed_utils import init_distributed_mode, save_on_master, mkdir
from .coco_eval import EvalCOCOMetric
from .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api


================================================
FILE: pytorch_object_detection/mask_rcnn/train_utils/coco_eval.py
================================================
import json
import copy

import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import pycocotools.mask as mask_util
from .distributed_utils import all_gather, is_main_process


def merge(img_ids, eval_results):
    """将多个进程之间的数据汇总在一起"""
    all_img_ids = all_gather(img_ids)
    all_eval_results = all_gather(eval_results)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_results = []
    for p in all_eval_results:
        merged_eval_results.extend(p)

    merged_img_ids = np.array(merged_img_ids)

    # keep only unique (and in sorted order) images
    # 去除重复的图片索引，多GPU训练时为了保证每个进程的训练图片数量相同，可能将一张图片分配给多个进程
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_results = [merged_eval_results[i] for i in idx]

    return list(merged_img_ids), merged_eval_results


class EvalCOCOMetric:
    def __init__(self,
                 coco: COCO = None,
                 iou_type: str = None,
                 results_file_name: str = "predict_results.json",
                 classes_mapping: dict = None):
        self.coco = copy.deepcopy(coco)
        self.img_ids = []  # 记录每个进程处理图片的ids
        self.results = []
        self.aggregation_results = None
        self.classes_mapping = classes_mapping
        self.coco_evaluator = None
        assert iou_type in ["bbox", "segm", "keypoints"]
        self.iou_type = iou_type
        self.results_file_name = results_file_name

    def prepare_for_coco_detection(self, targets, outputs):
        """将预测的结果转换成COCOeval指定的格式，针对目标检测任务"""
        # 遍历每张图像的预测结果
        for target, output in zip(targets, outputs):
            if len(output) == 0:
                continue

            img_id = int(target["image_id"])
            if img_id in self.img_ids:
                # 防止出现重复的数据
                continue
            self.img_ids.append(img_id)
            per_image_boxes = output["boxes"]
            # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]
            # 而我们预测的box格式是[x_min, y_min, x_max, y_max]，所以需要转下格式
            per_image_boxes[:, 2:] -= per_image_boxes[:, :2]
            per_image_classes = output["labels"].tolist()
            per_image_scores = output["scores"].tolist()

            res_list = []
            # 遍历每个目标的信息
            for object_score, object_class, object_box in zip(
                    per_image_scores, per_image_classes, per_image_boxes):
                object_score = float(object_score)
                class_idx = int(object_class)
                if self.classes_mapping is not None:
                    class_idx = int(self.classes_mapping[str(class_idx)])
                # We recommend rounding coordinates to the nearest tenth of a pixel
                # to reduce resulting JSON file size.
                object_box = [round(b, 2) for b in object_box.tolist()]

                res = {"image_id": img_id,
                       "category_id": class_idx,
                       "bbox": object_box,
                       "score": round(object_score, 3)}
                res_list.append(res)
            self.results.append(res_list)

    def prepare_for_coco_segmentation(self, targets, outputs):
        """将预测的结果转换成COCOeval指定的格式，针对实例分割任务"""
        # 遍历每张图像的预测结果
        for target, output in zip(targets, outputs):
            if len(output) == 0:
                continue

            img_id = int(target["image_id"])
            if img_id in self.img_ids:
                # 防止出现重复的数据
                continue

            self.img_ids.append(img_id)
            per_image_masks = output["masks"]
            per_image_classes = output["labels"].tolist()
            per_image_scores = output["scores"].tolist()

            masks = per_image_masks > 0.5

            res_list = []
            # 遍历每个目标的信息
            for mask, label, score in zip(masks, per_image_classes, per_image_scores):
                rle = mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                rle["counts"] = rle["counts"].decode("utf-8")

                class_idx = int(label)
                if self.classes_mapping is not None:
                    class_idx = int(self.classes_mapping[str(class_idx)])

                res = {"image_id": img_id,
                       "category_id": class_idx,
                       "segmentation": rle,
                       "score": round(score, 3)}
                res_list.append(res)
            self.results.append(res_list)

    def update(self, targets, outputs):
        if self.iou_type == "bbox":
            self.prepare_for_coco_detection(targets, outputs)
        elif self.iou_type == "segm":
            self.prepare_for_coco_segmentation(targets, outputs)
        else:
            raise KeyError(f"not support iou_type: {self.iou_type}")

    def synchronize_results(self):
        # 同步所有进程中的数据
        eval_ids, eval_results = merge(self.img_ids, self.results)
        self.aggregation_results = {"img_ids": eval_ids, "results": eval_results}

        # 主进程上保存即可
        if is_main_process():
            results = []
            [results.extend(i) for i in eval_results]
            # write predict results into json file
            json_str = json.dumps(results, indent=4)
            with open(self.results_file_name, 'w') as json_file:
                json_file.write(json_str)

    def evaluate(self):
        # 只在主进程上评估即可
        if is_main_process():
            # accumulate predictions from all images
            coco_true = self.coco
            coco_pre = coco_true.loadRes(self.results_file_name)

            self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)

            self.coco_evaluator.evaluate()
            self.coco_evaluator.accumulate()
            print(f"IoU metric: {self.iou_type}")
            self.coco_evaluator.summarize()

            coco_info = self.coco_evaluator.stats.tolist()  # numpy to list
            return coco_info
        else:
            return None


================================================
FILE: pytorch_object_detection/mask_rcnn/train_utils/coco_utils.py
================================================
import torch
import torch.utils.data
from pycocotools import mask as coco_mask
from pycocotools.coco import COCO


def coco_remove_images_without_annotations(dataset, ids):
    """
    删除coco数据集中没有目标，或者目标面积非常小的数据
    refer to:
    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py
    :param dataset:
    :param cat_list:
    :return:
    """
    def _has_only_empty_bbox(anno):
        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)

    def _has_valid_annotation(anno):
        # if it's empty, there is no annotation
        if len(anno) == 0:
            return False
        # if all boxes have close to zero area, there is no annotation
        if _has_only_empty_bbox(anno):
            return False

        return True

    valid_ids = []
    for ds_idx, img_id in enumerate(ids):
        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)
        anno = dataset.loadAnns(ann_ids)

        if _has_valid_annotation(anno):
            valid_ids.append(img_id)

    return valid_ids


def convert_coco_poly_mask(segmentations, height, width):
    masks = []
    for polygons in segmentations:
        rles = coco_mask.frPyObjects(polygons, height, width)
        mask = coco_mask.decode(rles)
        if len(mask.shape) < 3:
            mask = mask[..., None]
        mask = torch.as_tensor(mask, dtype=torch.uint8)
        mask = mask.any(dim=2)
        masks.append(mask)
    if masks:
        masks = torch.stack(masks, dim=0)
    else:
        # 如果mask为空，则说明没有目标，直接返回数值为0的mask
        masks = torch.zeros((0, height, width), dtype=torch.uint8)
    return masks


def convert_to_coco_api(self):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0, see torchvision issue #1530
    ann_id = 1
    dataset = {"images": [], "categories": [], "annotations": []}
    categories = set()
    for img_idx in range(len(self)):
        targets, h, w = self.get_annotations(img_idx)
        img_id = targets["image_id"].item()
        img_dict = {"id": img_id,
                    "height": h,
                    "width": w}
        dataset["images"].append(img_dict)
        bboxes = targets["boxes"].clone()
        # convert (x_min, ymin, xmax, ymax) to (xmin, ymin, w, h)
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets["labels"].tolist()
        areas = targets["area"].tolist()
        iscrowd = targets["iscrowd"].tolist()
        if "masks" in targets:
            masks = targets["masks"]
            # make masks Fortran contiguous for coco_mask
            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {"image_id": img_id,
                   "bbox": bboxes[i],
                   "category_id": labels[i],
                   "area": areas[i],
                   "iscrowd": iscrowd[i],
                   "id": ann_id}
            categories.add(labels[i])
            if "masks" in targets:
                ann["segmentation"] = coco_mask.encode(masks[i].numpy())
            dataset["annotations"].append(ann)
            ann_id += 1
    dataset["categories"] = [{"id": i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds


================================================
FILE: pytorch_object_detection/mask_rcnn/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    收集各个进程中的数据
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()  # 进程数
    if world_size == 1:
        return [data]

    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,

                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_object_detection/mask_rcnn/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k+1等份
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_object_detection/mask_rcnn/train_utils/train_eval_utils.py
================================================
import math
import sys
import time

import torch

import train_utils.distributed_utils as utils
from .coco_eval import EvalCOCOMetric


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=50, warmup=False, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    mloss = torch.zeros(1).to(device)  # mean losses
    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purpose
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        # 记录训练损失
        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses

        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, device):
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    det_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type="bbox", results_file_name="det_results.json")
    seg_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type="segm", results_file_name="seg_results.json")
    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)

        # 当使用CPU时，跳过GPU相关指令
        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        det_metric.update(targets, outputs)
        seg_metric.update(targets, outputs)
        metric_logger.update(model_time=model_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)

    # 同步所有进程中的数据
    det_metric.synchronize_results()
    seg_metric.synchronize_results()

    if utils.is_main_process():
        coco_info = det_metric.evaluate()
        seg_info = seg_metric.evaluate()
    else:
        coco_info = None
        seg_info = None

    return coco_info, seg_info


================================================
FILE: pytorch_object_detection/mask_rcnn/transforms.py
================================================
import random
from torchvision.transforms import functional as F


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class ToTensor(object):
    """将PIL图像转为Tensor"""
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target


class RandomHorizontalFlip(object):
    """随机水平翻转图像以及bboxes"""
    def __init__(self, prob=0.5):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)  # 水平翻转图片
            bbox = target["boxes"]
            # bbox: xmin, ymin, xmax, ymax
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
            target["boxes"] = bbox
            if "masks" in target:
                target["masks"] = target["masks"].flip(-1)
        return image, target


================================================
FILE: pytorch_object_detection/mask_rcnn/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
以及每个类别的mAP(IoU=0.5)
"""

import os
import json

import torch
from tqdm import tqdm
import numpy as np

import transforms
from backbone import resnet50_fpn_backbone
from network_files import MaskRCNN
from my_dataset_coco import CocoDetection
from my_dataset_voc import VOCInstances
from train_utils import EvalCOCOMetric


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 12, [""] * 12
    stats[0], print_list[0] = _summarize(1)
    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])
    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])
    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])
    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def save_info(coco_evaluator,
              category_index: dict,
              save_name: str = "record_mAP.txt"):
    iou_type = coco_evaluator.params.iouType
    print(f"IoU metric: {iou_type}")
    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_evaluator)

    # calculate voc info for every classes(IoU=0.5)
    classes = [v for v in category_index.values() if v != "N/A"]
    voc_map_info_list = []
    for i in range(len(classes)):
        stats, _ = summarize(coco_evaluator, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(classes[i], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open(save_name, "w") as f:
        record_lines = ["COCO results:",
                        print_coco,
                        "",
                        "mAP(IoU=0.5) for each category:",
                        print_voc]
        f.write("\n".join(record_lines))


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {
        "val": transforms.Compose([transforms.ToTensor()])
    }

    # read class_indict
    label_json_path = parser_data.label_json_path
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        category_index = json.load(f)

    data_root = parser_data.data_path

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = CocoDetection(data_root, "val", data_transform["val"])
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt", transforms=data_transform["val"])
    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     pin_memory=True,
                                                     num_workers=nw,
                                                     collate_fn=val_dataset.collate_fn)

    # create model
    backbone = resnet50_fpn_backbone()
    model = MaskRCNN(backbone, num_classes=args.num_classes + 1)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights_path
    assert os.path.exists(weights_path), "not found {} file.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location='cpu')['model'])
    # print(model)

    model.to(device)

    # evaluate on the val dataset
    cpu_device = torch.device("cpu")

    det_metric = EvalCOCOMetric(val_dataset.coco, "bbox", "det_results.json")
    seg_metric = EvalCOCOMetric(val_dataset.coco, "segm", "seg_results.json")
    model.eval()
    with torch.no_grad():
        for image, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            image = list(img.to(device) for img in image)

            # inference
            outputs = model(image)

            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
            det_metric.update(targets, outputs)
            seg_metric.update(targets, outputs)

    det_metric.synchronize_results()
    seg_metric.synchronize_results()
    det_metric.evaluate()
    seg_metric.evaluate()

    save_info(det_metric.coco_evaluator, category_index, "det_record_mAP.txt")
    save_info(seg_metric.coco_evaluator, category_index, "seg_record_mAP.txt")


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda', help='device')

    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', type=int, default=90, help='number of classes')

    # 数据集的根目录
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset root')

    # 训练好的权重文件
    parser.add_argument('--weights-path', default='./save_weights/model_25.pth', type=str, help='training weights')

    # batch size(set to 1, don't change)
    parser.add_argument('--batch-size', default=1, type=int, metavar='N',
                        help='batch size when validation.')
    # 类别索引和类别名称对应关系
    parser.add_argument('--label-json-path', type=str, default="coco91_indices.json")

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_object_detection/retinaNet/README.md
================================================
# RetinaNet

## 该项目主要是来自pytorch官方torchvision模块中的源码
* https://github.com/pytorch/vision/tree/master/torchvision/models/detection

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.7.1(注意：必须是1.6.0或以上，因为使用官方提供的混合精度训练1.6.0后才支持)
* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))
* Ubuntu或Centos(不建议Windows)
* 最好使用GPU训练
* 详细环境配置见`requirements.txt`

## 文件结构：
```
  ├── backbone: 特征提取网络(ResNet50+FPN)
  ├── network_files: RetinaNet网络
  ├── train_utils: 训练验证相关模块（包括cocotools）
  ├── my_dataset.py: 自定义dataset用于读取VOC数据集
  ├── train.py: 以resnet50+FPN做为backbone进行训练
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
  └── pascal_voc_classes.json: pascal_voc标签文件(注意索引从0开始，不包括背景)
```

## 预训练权重下载地址（下载后放入backbone文件夹中）：
* ResNet50+FPN backbone: https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth
* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是`retinanet_resnet50_fpn_coco.pth`文件，
  不是`retinanet_resnet50_fpn_coco-eeacb38b.pth`


## 数据集，本例程使用的是PASCAL VOC2012数据集
* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK
* 基于迁移学习在PASCAL VOC2012训练集训练得到的权重： 链接: https://pan.baidu.com/s/1mqrBFWuJ_lfDloCfVjWqaA  密码: sw0t
* 在PASCAL VOC2012验证集上结果：
```
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.563
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.798
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.616
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.236
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.434
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.626
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.486
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.688
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.707
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.421
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.604
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.758
```

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要单GPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_GPU.py`

## 注意事项
* 在使用训练脚本时，注意要将`--data-path`(VOC_root)设置为自己存放`VOCdevkit`文件夹所在的**根目录**
* 由于带有FPN结构的Faster RCNN很吃显存，如果GPU的显存不够(如果batch_size小于8的话)建议在create_model函数中使用默认的norm_layer，
  即不传递norm_layer变量，默认去使用FrozenBatchNorm2d(即不会去更新参数的bn层),使用中发现效果也很好。
* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率
* 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可，其他代码尽量不要改动


================================================
FILE: pytorch_object_detection/retinaNet/backbone/__init__.py
================================================
from .feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, LastLevelMaxPool
from .resnet50_fpn_model import resnet50_fpn_backbone


================================================
FILE: pytorch_object_detection/retinaNet/backbone/feature_pyramid_network.py
================================================
from collections import OrderedDict

import torch.nn as nn
import torch
from torch import Tensor
import torch.nn.functional as F

from torch.jit.annotations import Tuple, List, Dict


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model
    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.
    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.
    Arguments:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model, return_layers):
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")

        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}
        layers = OrderedDict()

        # 遍历模型子模块按顺序存入有序字典
        # 只保存layer4及其之前的结构，舍去之后不用的结构
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super().__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = OrderedDict()
        # 依次遍历模型的所有子模块，并进行正向传播，
        # 收集layer1, layer2, layer3, layer4的输出
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class BackboneWithFPN(nn.Module):
    """
    Adds a FPN on top of a model.
    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
    extract a submodel that returns the feature maps specified in return_layers.
    The same limitations of IntermediatLayerGetter apply here.
    Arguments:
        backbone (nn.Module)
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
        in_channels_list (List[int]): number of channels for each feature map
            that is returned, in the order they are present in the OrderedDict
        out_channels (int): number of channels in the FPN.
        extra_blocks: ExtraFPNBlock
    Attributes:
        out_channels (int): the number of channels in the FPN
    """

    def __init__(self,
                 backbone: nn.Module,
                 return_layers=None,
                 in_channels_list=None,
                 out_channels=256,
                 extra_blocks=None,
                 re_getter=True):
        super().__init__()

        if extra_blocks is None:
            extra_blocks = LastLevelMaxPool()

        if re_getter:
            assert return_layers is not None
            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        else:
            self.body = backbone

        self.fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=extra_blocks,
            )

        self.out_channels = out_channels

    def forward(self, x):
        x = self.body(x)
        x = self.fpn(x)
        return x


class ExtraFPNBlock(nn.Module):
    """
    Base class for the extra block in the FPN.

    Args:
        results (List[Tensor]): the result of the FPN
        x (List[Tensor]): the original feature maps
        names (List[str]): the names for each one of the
            original feature maps

    Returns:
        results (List[Tensor]): the extended set of results
            of the FPN
        names (List[str]): the extended set of names for the results
    """
    def forward(self,
                results: List[Tensor],
                x: List[Tensor],
                names: List[str]) -> Tuple[List[Tensor], List[str]]:
        pass


class LastLevelMaxPool(torch.nn.Module):
    """
    Applies a max_pool2d on top of the last feature map
    """

    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:
        names.append("pool")
        x.append(F.max_pool2d(x[-1], 1, 2, 0))
        return x, names


class LastLevelP6P7(ExtraFPNBlock):
    """
    This module is used in RetinaNet to generate extra layers, P6 and P7.
    """
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
        for module in [self.p6, self.p7]:
            nn.init.kaiming_uniform_(module.weight, a=1)
            nn.init.constant_(module.bias, 0)
        self.use_P5 = in_channels == out_channels

    def forward(self,
                p: List[Tensor],
                c: List[Tensor],
                names: List[str]) -> Tuple[List[Tensor], List[str]]:
        p5, c5 = p[-1], c[-1]
        x = p5 if self.use_P5 else c5
        p6 = self.p6(x)
        p7 = self.p7(F.relu(p6))
        p.extend([p6, p7])
        names.extend(["p6", "p7"])
        return p, names


class FeaturePyramidNetwork(nn.Module):
    """
    Module that adds a FPN from on top of a set of feature maps. This is based on
    `"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
    The feature maps are currently supposed to be in increasing depth
    order.
    The input to the model is expected to be an OrderedDict[Tensor], containing
    the feature maps on top of which the FPN will be added.
    Arguments:
        in_channels_list (list[int]): number of channels for each feature map that
            is passed to the module
        out_channels (int): number of channels of the FPN representation
        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
            be performed. It is expected to take the fpn features, the original
            features and the names of the original features as input, and returns
            a new list of feature maps and their corresponding names
    """

    def __init__(self, in_channels_list, out_channels, extra_blocks=None):
        super().__init__()
        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）
        self.inner_blocks = nn.ModuleList()
        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
        self.layer_blocks = nn.ModuleList()
        for in_channels in in_channels_list:
            if in_channels == 0:
                continue
            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
            self.inner_blocks.append(inner_block_module)
            self.layer_blocks.append(layer_block_module)

        # initialize parameters now to avoid modifying the initialization of top_blocks
        for m in self.children():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)

        self.extra_blocks = extra_blocks

    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.inner_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.inner_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.inner_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.layer_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.layer_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.layer_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
        """
        Computes the FPN for a set of feature maps.
        Arguments:
            x (OrderedDict[Tensor]): feature maps for each feature level.
        Returns:
            results (OrderedDict[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        # unpack OrderedDict into two lists for easier handling
        names = list(x.keys())
        x = list(x.values())

        # 将resnet layer4的channel调整到指定的out_channels
        # last_inner = self.inner_blocks[-1](x[-1])
        last_inner = self.get_result_from_inner_blocks(x[-1], -1)
        # result中保存着每个预测特征层
        results = []
        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵
        # results.append(self.layer_blocks[-1](last_inner))
        results.append(self.get_result_from_layer_blocks(last_inner, -1))

        for idx in range(len(x) - 2, -1, -1):
            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
            feat_shape = inner_lateral.shape[-2:]
            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
            last_inner = inner_lateral + inner_top_down
            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))

        # 在layer4对应的预测特征层基础上生成预测特征矩阵5
        if self.extra_blocks is not None:
            results, names = self.extra_blocks(results, x, names)

        # make it back an OrderedDict
        out = OrderedDict([(k, v) for k, v in zip(names, results)])

        return out


================================================
FILE: pytorch_object_detection/retinaNet/backbone/resnet50_fpn_model.py
================================================
import os

import torch.nn as nn
import torch
from torchvision.ops.misc import FrozenBatchNorm2d

from .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = norm_layer(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = norm_layer(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def overwrite_eps(model, eps):
    """
    This method overwrites the default eps values of all the
    FrozenBatchNorm2d layers of the model with the provided value.
    This is necessary to address the BC-breaking change introduced
    by the bug-fix at pytorch/vision#2933. The overwrite is applied
    only when the pretrained weights are loaded to maintain compatibility
    with previous versions.

    Args:
        model (nn.Module): The model on which we perform the overwrite.
        eps (float): The new value of eps.
    """
    for module in model.modules():
        if isinstance(module, FrozenBatchNorm2d):
            module.eps = eps


def resnet50_fpn_backbone(pretrain_path="",
                          norm_layer=FrozenBatchNorm2d,   # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新
                          trainable_layers=3,
                          returned_layers=None,
                          extra_blocks=None):
    """
    搭建resnet50_fpn——backbone
    Args:
        pretrain_path: resnet50的预训练权重，如果不使用就默认为空
        norm_layer: 官方默认的是FrozenBatchNorm2d，即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差，还不如不用bn层)
                    如果自己的GPU显存很大可以设置很大的batch_size，那么自己可以传入正常的BatchNorm2d层
                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
        trainable_layers: 指定训练哪些层结构
        returned_layers: 指定哪些层的输出需要返回
        extra_blocks: 在输出的特征层基础上额外添加的层结构

    Returns:

    """
    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],
                             include_top=False,
                             norm_layer=norm_layer)

    if isinstance(norm_layer, FrozenBatchNorm2d):
        overwrite_eps(resnet_backbone, 0.0)

    if pretrain_path != "":
        assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path)
        # 载入预训练权重
        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))

    # select layers that wont be frozen
    assert 0 <= trainable_layers <= 5
    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]

    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1
    if trainable_layers == 5:
        layers_to_train.append("bn1")

    # freeze layers
    for name, parameter in resnet_backbone.named_parameters():
        # 只训练不在layers_to_train列表中的层结构
        if all([not name.startswith(layer) for layer in layers_to_train]):
            parameter.requires_grad_(False)

    if extra_blocks is None:
        extra_blocks = LastLevelMaxPool()

    if returned_layers is None:
        returned_layers = [1, 2, 3, 4]
    # 返回的特征层个数肯定大于0小于5
    assert min(returned_layers) > 0 and max(returned_layers) < 5

    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}

    # in_channel 为layer4的输出特征矩阵channel = 2048
    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256
    # 记录resnet50提供给fpn的特征层channels
    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]
    # 通过fpn后得到的每个特征层的channel
    out_channels = 256
    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)


================================================
FILE: pytorch_object_detection/retinaNet/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = False):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: pytorch_object_detection/retinaNet/my_dataset.py
================================================
from torch.utils.data import Dataset
import os
import torch
import json
from PIL import Image
from lxml import etree


class VOCDataSet(Dataset):
    """读取解析PASCAL VOC2007/2012数据集"""

    def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        # 增加容错能力
        if "VOCdevkit" in voc_root:
            self.root = os.path.join(voc_root, f"VOC{year}")
        else:
            self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        self.img_root = os.path.join(self.root, "JPEGImages")
        self.annotations_root = os.path.join(self.root, "Annotations")

        # read train.txt or val.txt file
        txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name)
        assert os.path.exists(txt_path), "not found {} file.".format(txt_name)

        with open(txt_path) as read:
            self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
                             for line in read.readlines() if len(line.strip()) > 0]

        # check file
        assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)
        for xml_path in self.xml_list:
            assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path)

        # read class_indict
        json_file = './pascal_voc_classes.json'
        assert os.path.exists(json_file), "{} file not exist.".format(json_file)
        with open(json_file, 'r') as f:
            self.class_dict = json.load(f)

        self.transforms = transforms

    def __len__(self):
        return len(self.xml_list)

    def __getitem__(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        img_path = os.path.join(self.img_root, data["filename"])
        image = Image.open(img_path)
        if image.format != "JPEG":
            raise ValueError("Image '{}' format not JPEG".format(img_path))

        boxes = []
        labels = []
        iscrowd = []
        assert "object" in data, "{} lack of object information.".format(xml_path)
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            if "difficult" in obj:
                iscrowd.append(int(obj["difficult"]))
            else:
                iscrowd.append(0)

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def get_height_and_width(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        return data_height, data_width

    def parse_xml_to_dict(self, xml):
        """
        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
        Args:
            xml: xml tree obtained by parsing XML file contents using lxml.etree

        Returns:
            Python dictionary holding XML contents.
        """

        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
            return {xml.tag: xml.text}

        result = {}
        for child in xml:
            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}

    def coco_index(self, idx):
        """
        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理
        由于不用去读取图片，可大幅缩减统计时间

        Args:
            idx: 输入需要获取图像的索引
        """
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        # img_path = os.path.join(self.img_root, data["filename"])
        # image = Image.open(img_path)
        # if image.format != "JPEG":
        #     raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])

            # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan
            if xmax <= xmin or ymax <= ymin:
                print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path))
                continue
                
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        return (data_height, data_width), target

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))

# import transforms
# from draw_box_utils import draw_objs
# from PIL import Image
# import json
# import matplotlib.pyplot as plt
# import torchvision.transforms as ts
# import random
#
# # read class_indict
# category_index = {}
# try:
#     json_file = open('./pascal_voc_classes.json', 'r')
#     class_dict = json.load(json_file)
#     category_index = {str(v): str(k) for k, v in class_dict.items()}
# except Exception as e:
#     print(e)
#     exit(-1)
#
# data_transform = {
#     "train": transforms.Compose([transforms.ToTensor(),
#                                  transforms.RandomHorizontalFlip(0.5)]),
#     "val": transforms.Compose([transforms.ToTensor()])
# }
#
# # load train data set
# train_data_set = VOCDataSet(os.getcwd(), "2012", data_transform["train"], "train.txt")
# print(len(train_data_set))
# for index in random.sample(range(0, len(train_data_set)), k=5):
#     img, target = train_data_set[index]
#     img = ts.ToPILImage()(img)
#     plot_img = draw_objs(img,
#                          target["boxes"].numpy(),
#                          target["labels"].numpy(),
#                          np.ones(target["labels"].shape[0]),
#                          category_index=category_index,
#                          box_thresh=0.5,
#                          line_thickness=3,
#                          font='arial.ttf',
#                          font_size=20)
#     plt.imshow(plot_img)
#     plt.show()


================================================
FILE: pytorch_object_detection/retinaNet/network_files/__init__.py
================================================
from .retinanet import RetinaNet


================================================
FILE: pytorch_object_detection/retinaNet/network_files/anchor_utils.py
================================================
from typing import List, Optional, Dict

import torch
from torch import nn, Tensor

from .image_list import ImageList


class AnchorsGenerator(nn.Module):
    __annotations__ = {
        "cell_anchors": Optional[List[torch.Tensor]],
        "_cache": Dict[str, List[torch.Tensor]]
    }

    """
    anchors生成器
    Module that generates anchors for a set of feature maps and
    image sizes.

    The module support computing anchors at multiple sizes and aspect ratios
    per feature map.

    sizes and aspect_ratios should have the same number of elements, and it should
    correspond to the number of feature maps.

    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
    per spatial location for feature map i.

    Arguments:
        sizes (Tuple[Tuple[int]]):
        aspect_ratios (Tuple[Tuple[float]]):
    """

    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):
        super(AnchorsGenerator, self).__init__()

        if not isinstance(sizes[0], (list, tuple)):
            # TODO change this
            sizes = tuple((s,) for s in sizes)
        if not isinstance(aspect_ratios[0], (list, tuple)):
            aspect_ratios = (aspect_ratios,) * len(sizes)

        assert len(sizes) == len(aspect_ratios)

        self.sizes = sizes
        self.aspect_ratios = aspect_ratios
        self.cell_anchors = None
        self._cache = {}

    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")):
        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor
        """
        compute anchor sizes
        Arguments:
            scales: sqrt(anchor_area)
            aspect_ratios: h/w ratios
            dtype: float32
            device: cpu/gpu
        """
        scales = torch.as_tensor(scales, dtype=dtype, device=device)
        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
        h_ratios = torch.sqrt(aspect_ratios)
        w_ratios = 1.0 / h_ratios

        # [r1, r2, r3]' * [s1, s2, s3]
        # number of elements is len(ratios)*len(scales)
        ws = (w_ratios[:, None] * scales[None, :]).view(-1)
        hs = (h_ratios[:, None] * scales[None, :]).view(-1)

        # left-top, right-bottom coordinate relative to anchor center(0, 0)
        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]
        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2

        return base_anchors.round()  # round 四舍五入

    def set_cell_anchors(self, dtype, device):
        # type: (torch.dtype, torch.device) -> None
        if self.cell_anchors is not None:
            cell_anchors = self.cell_anchors
            assert cell_anchors is not None
            # suppose that all anchors have the same device
            # which is a valid assumption in the current state of the codebase
            if cell_anchors[0].device == device:
                return

        # 根据提供的sizes和aspect_ratios生成anchors模板
        # anchors模板都是以(0, 0)为中心的anchor
        cell_anchors = [
            self.generate_anchors(sizes, aspect_ratios, dtype, device)
            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)
        ]
        self.cell_anchors = cell_anchors

    def num_anchors_per_location(self):
        # 计算每个预测特征层上每个滑动窗口的预测目标数
        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]

    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
    def grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """
        anchors position in grid coordinate axis map into origin image
        计算预测特征图对应原始图像上的所有anchors的坐标
        Args:
            grid_sizes: 预测特征矩阵的height和width
            strides: 预测特征矩阵上一步对应原始图像上的步距
        """
        anchors = []
        cell_anchors = self.cell_anchors
        assert cell_anchors is not None

        # 遍历每个预测特征层的grid_size，strides和cell_anchors
        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
            grid_height, grid_width = size
            stride_height, stride_width = stride
            device = base_anchors.device

            # For output anchor, compute [x_center, y_center, x_center, y_center]
            # shape: [grid_width] 对应原图上的x坐标(列)
            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width
            # shape: [grid_height] 对应原图上的y坐标(行)
            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height

            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)
            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵
            # shape: [grid_height, grid_width]
            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
            shift_x = shift_x.reshape(-1)
            shift_y = shift_y.reshape(-1)

            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量
            # shape: [grid_width*grid_height, 4]
            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)

            # For every (base anchor, output anchor) pair,
            # offset each zero-centered base anchor by the center of the output anchor.
            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)
            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)
            anchors.append(shifts_anchor.reshape(-1, 4))

        return anchors  # List[Tensor(all_num_anchors, 4)]

    def cached_grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """将计算得到的所有anchors信息进行缓存"""
        key = str(grid_sizes) + str(strides)
        # self._cache是字典类型
        if key in self._cache:
            return self._cache[key]
        anchors = self.grid_anchors(grid_sizes, strides)
        self._cache[key] = anchors
        return anchors

    def forward(self, image_list, feature_maps):
        # type: (ImageList, List[Tensor]) -> List[Tensor]
        # 获取每个预测特征层的尺寸(height, width)
        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])

        # 获取输入图像的height和width
        image_size = image_list.tensors.shape[-2:]

        # 获取变量类型和设备类型
        dtype, device = feature_maps[0].dtype, feature_maps[0].device

        # one step in feature map equate n pixel stride in origin image
        # 计算特征层上的一步等于原始图像上的步长
        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),
                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]

        # 根据提供的sizes和aspect_ratios生成anchors模板
        self.set_cell_anchors(dtype, device)

        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）
        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息
        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)

        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
        # 遍历一个batch中的每张图像
        for i, (image_height, image_width) in enumerate(image_list.image_sizes):
            anchors_in_image = []
            # 遍历每张预测特征图映射回原图的anchors坐标信息
            for anchors_per_feature_map in anchors_over_all_feature_maps:
                anchors_in_image.append(anchors_per_feature_map)
            anchors.append(anchors_in_image)
        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起
        # anchors是个list，每个元素为一张图像的所有anchors信息
        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
        # Clear the cache in case that memory leaks.
        self._cache.clear()
        return anchors


================================================
FILE: pytorch_object_detection/retinaNet/network_files/boxes.py
================================================
import torch
from typing import Tuple
from torch import Tensor
import torchvision


def nms(boxes, scores, iou_threshold):
    # type: (Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Parameters
    ----------
    boxes : Tensor[N, 4])
        boxes to perform NMS on. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    iou_threshold : float
        discards all overlapping
        boxes with IoU > iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices
        of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)


def batched_nms(boxes, scores, idxs, iou_threshold):
    # type: (Tensor, Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression in a batched fashion.

    Each index value correspond to a category, and NMS
    will not be applied between elements of different categories.

    Parameters
    ----------
    boxes : Tensor[N, 4]
        boxes where NMS will be performed. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    idxs : Tensor[N]
        indices of the categories for each one of the boxes.
    iou_threshold : float
        discards all overlapping boxes
        with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices of
        the elements that have been kept by NMS, sorted
        in decreasing order of scores
    """
    if boxes.numel() == 0:
        return torch.empty((0,), dtype=torch.int64, device=boxes.device)

    # strategy: in order to perform NMS independently per class.
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）
    max_coordinate = boxes.max()

    # to(): Performs Tensor dtype and/or device conversion
    # 为每一个类别/每一层生成一个很大的偏移量
    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致
    offsets = idxs.to(boxes) * (max_coordinate + 1)
    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    return keep


def remove_small_boxes(boxes, min_size):
    # type: (Tensor, float) -> Tensor
    """
    Remove boxes which contains at least one side smaller than min_size.
    移除宽高小于指定阈值的索引
    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        min_size (float): minimum size

    Returns:
        keep (Tensor[K]): indices of the boxes that have both sides
            larger than min_size
    """
    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高
    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True
    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))
    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input
    # keep = keep.nonzero().squeeze(1)
    keep = torch.where(keep)[0]
    return keep


def clip_boxes_to_image(boxes, size):
    # type: (Tensor, Tuple[int, int]) -> Tensor
    """
    Clip boxes so that they lie inside an image of size `size`.
    裁剪预测的boxes信息，将越界的坐标调整到图片边界上

    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        size (Tuple[height, width]): size of the image

    Returns:
        clipped_boxes (Tensor[N, 4])
    """
    dim = boxes.dim()
    boxes_x = boxes[..., 0::2]  # x1, x2
    boxes_y = boxes[..., 1::2]  # y1, y2
    height, width = size

    if torchvision._is_tracing():
        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
    else:
        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间
        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间

    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
    return clipped_boxes.reshape(boxes.shape)


def box_area(boxes):
    """
    Computes the area of a set of bounding boxes, which are specified by its
    (x1, y1, x2, y2) coordinates.

    Arguments:
        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
            are expected to be in (x1, y1, x2, y2) format

    Returns:
        area (Tensor[N]): area for each box
    """
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])


def box_iou(boxes1, boxes2):
    """
    Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

    Arguments:
        boxes1 (Tensor[N, 4])
        boxes2 (Tensor[M, 4])

    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    #  When the shapes do not match,
    #  the shape of the returned output tensor follows the broadcasting rules
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    iou = inter / (area1[:, None] + area2 - inter)
    return iou


================================================
FILE: pytorch_object_detection/retinaNet/network_files/det_utils.py
================================================
import torch
import math
from typing import List, Tuple
from torch import Tensor


class BalancedPositiveNegativeSampler(object):
    """
    This class samples batches, ensuring that they contain a fixed proportion of positives
    """

    def __init__(self, batch_size_per_image, positive_fraction):
        # type: (int, float) -> None
        """
        Arguments:
            batch_size_per_image (int): number of elements to be selected per image
            positive_fraction (float): percentage of positive elements per batch
        """
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction

    def __call__(self, matched_idxs):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        Arguments:
            matched idxs: list of tensors containing -1, 0 or positive values.
                Each tensor corresponds to a specific image.
                -1 values are ignored, 0 are considered as negatives and > 0 as
                positives.

        Returns:
            pos_idx (list[tensor])
            neg_idx (list[tensor])

        Returns two lists of binary masks for each image.
        The first list contains the positive elements that were selected,
        and the second list the negative example.
        """
        pos_idx = []
        neg_idx = []
        # 遍历每张图像的matched_idxs
        for matched_idxs_per_image in matched_idxs:
            # >= 1的为正样本, nonzero返回非零元素索引
            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]
            # = 0的为负样本
            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]

            # 指定正样本的数量
            num_pos = int(self.batch_size_per_image * self.positive_fraction)
            # protect against not enough positive examples
            # 如果正样本数量不够就直接采用所有正样本
            num_pos = min(positive.numel(), num_pos)
            # 指定负样本数量
            num_neg = self.batch_size_per_image - num_pos
            # protect against not enough negative examples
            # 如果负样本数量不够就直接采用所有负样本
            num_neg = min(negative.numel(), num_neg)

            # randomly select positive and negative examples
            # Returns a random permutation of integers from 0 to n - 1.
            # 随机选择指定数量的正负样本
            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]

            pos_idx_per_image = positive[perm1]
            neg_idx_per_image = negative[perm2]

            # create binary mask from indices
            pos_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )
            neg_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )

            pos_idx_per_image_mask[pos_idx_per_image] = 1
            neg_idx_per_image_mask[neg_idx_per_image] = 1

            pos_idx.append(pos_idx_per_image_mask)
            neg_idx.append(neg_idx_per_image_mask)

        return pos_idx, neg_idx


@torch.jit._script_if_tracing
def encode_boxes(reference_boxes, proposals, weights):
    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
    """
    Encode a set of proposals with respect to some
    reference boxes

    Arguments:
        reference_boxes (Tensor): reference boxes(gt)
        proposals (Tensor): boxes to be encoded(anchors)
        weights:
    """

    # perform some unpacking to make it JIT-fusion friendly
    wx = weights[0]
    wy = weights[1]
    ww = weights[2]
    wh = weights[3]

    # unsqueeze()
    # Returns a new tensor with a dimension of size one inserted at the specified position.
    proposals_x1 = proposals[:, 0].unsqueeze(1)
    proposals_y1 = proposals[:, 1].unsqueeze(1)
    proposals_x2 = proposals[:, 2].unsqueeze(1)
    proposals_y2 = proposals[:, 3].unsqueeze(1)

    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)
    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)
    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)
    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)

    # implementation starts here
    # parse widths and heights
    ex_widths = proposals_x2 - proposals_x1
    ex_heights = proposals_y2 - proposals_y1
    # parse coordinate of center point
    ex_ctr_x = proposals_x1 + 0.5 * ex_widths
    ex_ctr_y = proposals_y1 + 0.5 * ex_heights

    gt_widths = reference_boxes_x2 - reference_boxes_x1
    gt_heights = reference_boxes_y2 - reference_boxes_y1
    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths
    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights

    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = ww * torch.log(gt_widths / ex_widths)
    targets_dh = wh * torch.log(gt_heights / ex_heights)

    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
    return targets


class BoxCoder(object):
    """
    This class encodes and decodes a set of bounding boxes into
    the representation used for training the regressors.
    """

    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
        # type: (Tuple[float, float, float, float], float) -> None
        """
        Arguments:
            weights (4-element tuple)
            bbox_xform_clip (float)
        """
        self.weights = weights
        self.bbox_xform_clip = bbox_xform_clip

    def encode(self, reference_boxes, proposals):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        结合anchors和与之对应的gt计算regression参数
        Args:
            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes
            proposals: List[Tensor] anchors/proposals

        Returns: regression parameters

        """
        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开
        # reference_boxes和proposal数据结构相同
        boxes_per_image = [len(b) for b in reference_boxes]
        reference_boxes = torch.cat(reference_boxes, dim=0)
        proposals = torch.cat(proposals, dim=0)

        # targets_dx, targets_dy, targets_dw, targets_dh
        targets = self.encode_single(reference_boxes, proposals)
        return targets.split(boxes_per_image, 0)

    def encode_single(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """
        dtype = reference_boxes.dtype
        device = reference_boxes.device
        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)
        targets = encode_boxes(reference_boxes, proposals, weights)

        return targets

    def decode(self, rel_codes, boxes):
        # type: (Tensor, List[Tensor]) -> Tensor
        """

        Args:
            rel_codes: bbox regression parameters
            boxes: anchors/proposals

        Returns:

        """
        assert isinstance(boxes, (list, tuple))
        assert isinstance(rel_codes, torch.Tensor)
        boxes_per_image = [b.size(0) for b in boxes]
        concat_boxes = torch.cat(boxes, dim=0)

        box_sum = 0
        for val in boxes_per_image:
            box_sum += val

        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标
        pred_boxes = self.decode_single(
            rel_codes, concat_boxes
        )

        if box_sum > 0:
            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)

        return pred_boxes

    def decode_single(self, rel_codes, boxes):
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Arguments:
            rel_codes (Tensor): encoded boxes (bbox regression parameters)
            boxes (Tensor): reference boxes (anchors/proposals)
        """
        boxes = boxes.to(rel_codes.dtype)

        # xmin, ymin, xmax, ymax
        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度
        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度
        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标
        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标

        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]
        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数
        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数
        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数
        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数

        # limit max value, prevent sending too large values into torch.exp()
        # self.bbox_xform_clip=math.log(1000. / 16)   4.135
        dw = torch.clamp(dw, max=self.bbox_xform_clip)
        dh = torch.clamp(dh, max=self.bbox_xform_clip)

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        # xmin
        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymin
        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
        # xmax
        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymax
        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h

        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
        return pred_boxes


class Matcher(object):
    BELOW_LOW_THRESHOLD = -1
    BETWEEN_THRESHOLDS = -2

    __annotations__ = {
        'BELOW_LOW_THRESHOLD': int,
        'BETWEEN_THRESHOLDS': int,
    }

    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
        # type: (float, float, bool) -> None
        """
        Args:
            high_threshold (float): quality values greater than or equal to
                this value are candidate matches.
            low_threshold (float): a lower quality threshold used to stratify
                matches into three levels:
                1) matches >= high_threshold
                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
            allow_low_quality_matches (bool): if True, produce additional matches
                for predictions that have only low-quality match candidates. See
                set_low_quality_matches_ for more details.
        """
        self.BELOW_LOW_THRESHOLD = -1
        self.BETWEEN_THRESHOLDS = -2
        assert low_threshold <= high_threshold
        self.high_threshold = high_threshold  # 0.7
        self.low_threshold = low_threshold    # 0.3
        self.allow_low_quality_matches = allow_low_quality_matches

    def __call__(self, match_quality_matrix):
        """
        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，
        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        """
        if match_quality_matrix.numel() == 0:
            # empty targets or proposals not supported during training
            if match_quality_matrix.shape[0] == 0:
                raise ValueError(
                    "No ground-truth boxes available for one of the images "
                    "during training")
            else:
                raise ValueError(
                    "No proposal boxes available for one of the images "
                    "during training")

        # match_quality_matrix is M (gt) x N (predicted)
        # Max over gt elements (dim 0) to find best gt candidate for each prediction
        # M x N 的每一列代表一个anchors与所有gt的匹配iou值
        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值
        # matches对应最大值所在的索引
        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.
        if self.allow_low_quality_matches:
            all_matches = matches.clone()
        else:
            all_matches = None

        # Assign candidate matches with low quality to negative (unassigned) values
        # 计算iou小于low_threshold的索引
        below_low_threshold = matched_vals < self.low_threshold
        # 计算iou在low_threshold与high_threshold之间的索引值
        between_thresholds = (matched_vals >= self.low_threshold) & (
            matched_vals < self.high_threshold
        )
        # iou小于low_threshold的matches索引置为-1
        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1

        # iou在[low_threshold, high_threshold]之间的matches索引置为-2
        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2

        if self.allow_low_quality_matches:
            assert all_matches is not None
            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)

        return matches

    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
        """
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        """
        # For each gt, find the prediction with which it has highest quality
        # 对于每个gt boxes寻找与其iou最大的anchor，
        # highest_quality_foreach_gt为匹配到的最大iou值
        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.

        # Find highest quality match available, even if it is low, including ties
        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor
        # gt_pred_pairs_of_highest_quality = torch.nonzero(
        #     match_quality_matrix == highest_quality_foreach_gt[:, None]
        # )
        gt_pred_pairs_of_highest_quality = torch.where(
            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])
        )
        # Example gt_pred_pairs_of_highest_quality:
        #   tensor([[    0, 39796],
        #           [    1, 32055],
        #           [    1, 32070],
        #           [    2, 39190],
        #           [    2, 40255],
        #           [    3, 40390],
        #           [    3, 41455],
        #           [    4, 45470],
        #           [    5, 45325],
        #           [    5, 46390]])
        # Each row is a (gt index, prediction index)
        # Note how gt items 1, 2, 3, and 5 each have two ties

        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)
        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]
        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值
        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]


def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):
    """
    very similar to the smooth_l1_loss from pytorch, but with
    the extra beta parameter
    """
    n = torch.abs(input - target)
    # cond = n < beta
    cond = torch.lt(n, beta)
    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
    if size_average:
        return loss.mean()
    return loss.sum()


================================================
FILE: pytorch_object_detection/retinaNet/network_files/image_list.py
================================================
from typing import List, Tuple
from torch import Tensor


class ImageList(object):
    """
    Structure that holds a list of images (of possibly
    varying sizes) as a single tensor.
    This works by padding the images to the same size,
    and storing in a field the original sizes of each image
    """

    def __init__(self, tensors, image_sizes):
        # type: (Tensor, List[Tuple[int, int]]) -> None
        """
        Arguments:
            tensors (tensor) padding后的图像数据
            image_sizes (list[tuple[int, int]])  padding前的图像尺寸
        """
        self.tensors = tensors
        self.image_sizes = image_sizes

    def to(self, device):
        # type: (Device) -> ImageList # noqa
        cast_tensor = self.tensors.to(device)
        return ImageList(cast_tensor, self.image_sizes)


================================================
FILE: pytorch_object_detection/retinaNet/network_files/losses.py
================================================
import torch
import torch.nn.functional as F


def sigmoid_focal_loss(
    inputs: torch.Tensor,
    targets: torch.Tensor,
    alpha: float = 0.25,
    gamma: float = 2,
    reduction: str = "none",
):
    """
    Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py .
    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.

    Args:
        inputs: A float tensor of arbitrary shape.
                The predictions for each example.
        targets: A float tensor with the same shape as inputs. Stores the binary
                classification label for each element in inputs
                (0 for the negative class and 1 for the positive class).
        alpha: (optional) Weighting factor in range (0,1) to balance
                positive vs negative examples or -1 for ignore. Default = 0.25
        gamma: Exponent of the modulating factor (1 - p_t) to
               balance easy vs hard examples.
        reduction: 'none' | 'mean' | 'sum'
                 'none': No reduction will be applied to the output.
                 'mean': The output will be averaged.
                 'sum': The output will be summed.
    Returns:
        Loss tensor with the reduction option applied.
    """
    p = torch.sigmoid(inputs)
    ce_loss = F.binary_cross_entropy_with_logits(
        inputs, targets, reduction="none"
    )
    p_t = p * targets + (1 - p) * (1 - targets)
    loss = ce_loss * ((1 - p_t) ** gamma)

    if alpha >= 0:
        alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
        loss = alpha_t * loss

    if reduction == "mean":
        loss = loss.mean()
    elif reduction == "sum":
        loss = loss.sum()

    return loss


================================================
FILE: pytorch_object_detection/retinaNet/network_files/retinanet.py
================================================
import math
import warnings
from collections import OrderedDict
from typing import Dict, List, Tuple, Optional, Union

import torch
from torch import nn, Tensor

from . import det_utils
from .anchor_utils import AnchorsGenerator
from . import boxes as box_ops
from .losses import sigmoid_focal_loss
from .transform import GeneralizedRCNNTransform


def _sum(x: List[Tensor]) -> Tensor:
    res = x[0]
    for i in x[1:]:
        res = res + i
    return res


class RetinaNetClassificationHead(nn.Module):
    """
    A classification head for use in RetinaNet.

    Args:
        in_channels (int): number of channels of the input feature
        num_anchors (int): number of anchors to be predicted
        num_classes (int): number of classes to be predicted
    """

    def __init__(self, in_channels, num_anchors, num_classes, prior_probability=0.01):
        super(RetinaNetClassificationHead, self).__init__()

        # class subnet是由四个3x3的卷积层(激活函数为ReLU) + 一个3x3的卷积层(分类器)
        conv = []
        for _ in range(4):
            conv.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
            conv.append(nn.ReLU(inplace=True))
        self.conv = nn.Sequential(*conv)

        self.cls_logits = nn.Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)

        # initial weights
        for layer in self.conv.children():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight, std=0.01)
                torch.nn.init.constant_(layer.bias, 0)

        torch.nn.init.normal_(self.cls_logits.weight, std=0.01)
        torch.nn.init.constant_(self.cls_logits.bias, -math.log((1 - prior_probability) / prior_probability))

        self.num_classes = num_classes
        self.num_anchors = num_anchors

        self.BETWEEN_THRESHOLDS = det_utils.Matcher.BETWEEN_THRESHOLDS

    def compute_loss(self,
                     targets: List[Dict[str, Tensor]],
                     head_outputs: Dict[str, Tensor],
                     matched_idxs: List[Tensor]) -> Tensor:
        losses = []
        cls_logits = head_outputs["cls_logits"]
        for targets_per_img, cls_logits_per_img, matched_idxs_per_img in zip(targets, cls_logits, matched_idxs):
            # determine only the foreground
            # 找出所有前景目标
            foreground_idxs_per_img = torch.ge(matched_idxs_per_img, 0)  # ge: >=
            num_foreground = foreground_idxs_per_img.sum()

            # create the target classification
            gt_classes_target = torch.zeros_like(cls_logits_per_img)
            gt_classes_target[
                foreground_idxs_per_img,
                targets_per_img["labels"][matched_idxs_per_img[foreground_idxs_per_img]]
            ] = 1.0

            # find indices for which anchors should be ignored
            # 忽略iou在[0.4, 0.5)之间的anchors
            valid_idxs_per_img = torch.ne(matched_idxs_per_img, self.BETWEEN_THRESHOLDS)  # ne: !=

            # compute the classification loss
            losses.append(sigmoid_focal_loss(
                cls_logits_per_img[valid_idxs_per_img],
                gt_classes_target[valid_idxs_per_img],
                reduction="sum"
            ) / max(1, num_foreground))  # 注意这里除以的是正样本的个数

        # len(targets): batch_size
        return _sum(losses) / len(targets)

    def forward(self, x: Tensor) -> Tensor:
        all_cls_logits = []

        # 遍历每个预测特征层
        for features in x:
            cls_logits = self.conv(features)
            cls_logits = self.cls_logits(cls_logits)

            # Permute classification output from (N, A * K, H, W) to (N, HWA, K).
            N, _, H, W = cls_logits.shape
            cls_logits = cls_logits.view(N, -1, self.num_classes, H, W)
            # [N, A, K, H, W] -> [N, H, W, A, K]
            cls_logits = cls_logits.permute(0, 3, 4, 1, 2)
            # [N, H, W, A, K] -> [N, HWA, K]
            cls_logits = cls_logits.reshape(N, -1, self.num_classes)

            all_cls_logits.append(cls_logits)

        return torch.cat(all_cls_logits, dim=1)


class RetinaNetRegressionHead(nn.Module):
    """
    A regression head for use in RetinaNet.

    Args:
        in_channels (int): number of channels of the input feature
        num_anchors (int): number of anchors to be predicted
    """

    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
    }

    def __init__(self, in_channels, num_anchors):
        super(RetinaNetRegressionHead, self).__init__()

        # box subnet是由四个3x3的卷积层(激活函数为ReLU) + 一个3x3的卷积层(边界框回归器)
        conv = []
        for _ in range(4):
            conv.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
            conv.append(nn.ReLU(inplace=True))
        self.conv = nn.Sequential(*conv)

        self.bbox_reg = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)

        # initial weights
        for layer in self.conv.children():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight, std=0.01)
                torch.nn.init.zeros_(layer.bias)

        self.bbox_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

    def compute_loss(self,
                     targets: List[Dict[str, Tensor]],
                     head_outputs: Dict[str, Tensor],
                     anchors: List[Tensor],
                     matched_idxs: List[Tensor]) -> Tensor:
        losses = []

        bbox_regression = head_outputs["bbox_regression"]
        for targets_per_img, bbox_regression_per_img, anchors_per_img, matched_idxs_per_img in \
                zip(targets, bbox_regression, anchors, matched_idxs):
            # determine only the foreground indices, ignore the rest
            foreground_idxs_per_img = torch.where(torch.ge(matched_idxs_per_img, 0))[0]  # ge: >=
            num_foreground = foreground_idxs_per_img.numel()

            # select only the foreground boxes
            matched_gt_boxes_per_img = targets_per_img["boxes"][matched_idxs_per_img[foreground_idxs_per_img]]
            bbox_regression_per_img = bbox_regression_per_img[foreground_idxs_per_img, :]
            anchors_per_img = anchors_per_img[foreground_idxs_per_img, :]

            # compute the regression targets
            targets_regression = self.bbox_coder.encode_single(matched_gt_boxes_per_img, anchors_per_img)

            # compute the box regression loss
            losses.append(torch.nn.functional.l1_loss(
                bbox_regression_per_img,
                targets_regression,
                reduction="sum"
            ) / max(1, num_foreground))

        return _sum(losses) / max(1, len(targets))

    def forward(self, x: List[Tensor]) -> Tensor:
        all_bbox_regression = []

        # 遍历每个预测特征层
        for features in x:
            bbox_regression = self.conv(features)
            bbox_regression = self.bbox_reg(bbox_regression)

            # Permute bbox regression output from (N, 4 * A, H, W) to (N, HWA, 4).
            N, _, H, W = bbox_regression.shape
            # [N, 4 * A, H, W] -> [N, A, 4, H, W]
            bbox_regression = bbox_regression.view(N, -1, 4, H, W)
            # [N, A, 4, H, W] -> [N, H, W, A, 4]
            bbox_regression = bbox_regression.permute(0, 3, 4, 1, 2)
            # [N, H, W, A, 4] -> [N, HWA, 4]
            bbox_regression = bbox_regression.reshape(N, -1, 4)

            all_bbox_regression.append(bbox_regression)

        return torch.cat(all_bbox_regression, dim=1)


class RetinaNetHead(nn.Module):
    """
    A regression and classification head for use in RetinaNet.

    Args:
        in_channels (int): number of channels of the input feature
        num_anchors (int): number of anchors to be predicted
        num_classes (int): number of classes to be predicted
    """

    def __init__(self, in_channels, num_anchors, num_classes):
        super(RetinaNetHead, self).__init__()
        self.classification_head = RetinaNetClassificationHead(in_channels, num_anchors, num_classes)
        self.regression_head = RetinaNetRegressionHead(in_channels, num_anchors)

    def compute_loss(self,
                     targets: List[Dict[str, Tensor]],
                     head_outputs: Dict[str, Tensor],
                     anchors: List[Tensor],
                     matched_idxs: List[Tensor]) -> Dict[str, Tensor]:
        return {
            "classification": self.classification_head.compute_loss(targets, head_outputs, matched_idxs),
            "bbox_regression": self.regression_head.compute_loss(targets, head_outputs, anchors, matched_idxs)
        }

    def forward(self, x: List[Tensor]) -> Dict[str, Tensor]:
        return {
            "cls_logits": self.classification_head(x),
            "bbox_regression": self.regression_head(x)
        }


class RetinaNet(nn.Module):
    """
    Implements RetinaNet.

    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
    image, and should be in 0-1 range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (Int64Tensor[N]): the class label for each ground-truth box

    The model returns a Dict[Tensor] during training, containing the classification and regression
    losses.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
    follows:
        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (Int64Tensor[N]): the predicted labels for each image
        - scores (Tensor[N]): the scores for each prediction

    Args:
        backbone (nn.Module): the network used to compute the features for the model.
            It should contain an out_channels attribute, which indicates the number of output
            channels that each feature map has (and it should be the same for all feature maps).
            The backbone should return a single Tensor or an OrderedDict[Tensor].
        num_classes (int): number of output classes of the model (excluding the background).
        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
        image_mean (Tuple[float, float, float]): mean values used for input normalization.
            They are generally the mean values of the dataset on which the backbone has been trained
            on
        image_std (Tuple[float, float, float]): std values used for input normalization.
            They are generally the std values of the dataset on which the backbone has been trained on
        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        head (nn.Module): Module run on top of the feature pyramid.
            Defaults to a module containing a classification and regression module.
        score_thresh (float): Score threshold used for postprocessing the detections.
        nms_thresh (float): NMS threshold used for postprocessing the detections.
        detections_per_img (int): Number of best detections to keep after NMS.
        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training.
        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training.
        topk_candidates (int): Number of best detections to keep before NMS.
    """

    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
    }

    def __init__(self, backbone, num_classes,
                 # transform parameters
                 min_size=800, max_size=1333,
                 image_mean=None, image_std=None,
                 # Anchor parameters
                 anchor_generator=None, head=None,
                 proposal_matcher=None,
                 score_thresh=0.05,
                 nms_thresh=0.5,
                 detections_per_img=100,
                 fg_iou_thresh=0.5, bg_iou_thresh=0.4,
                 topk_candidates=1000):
        super(RetinaNet, self).__init__()

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)"
            )

        self.backbone = backbone

        assert isinstance(anchor_generator, (AnchorsGenerator, type(None)))

        if anchor_generator is None:
            # 原论文中说在每个预测特征层上除了使用给定的尺度x外，还要额外添加x*2^(1/3)和x*2^(2/3)这两个尺度
            # 五个预测特征层采用的原始尺度分别为32， 64， 128， 256， 512
            # 注意尺度和面积的关系，面积=尺度^2
            anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3)))
                                 for x in [32, 64, 128, 256, 512])
            # 对于每个预测特征层上anchors，都会使用三种比例
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            anchor_generator = AnchorsGenerator(anchor_sizes, aspect_ratios)

        self.anchor_generator = anchor_generator

        if head is None:
            head = RetinaNetHead(backbone.out_channels,   # in_channels
                                 anchor_generator.num_anchors_per_location()[0],  # num_anchors
                                 num_classes)  # num_classes
        self.head = head

        if proposal_matcher is None:
            proposal_matcher = det_utils.Matcher(
                fg_iou_thresh,
                bg_iou_thresh,
                allow_low_quality_matches=True
            )
        self.proposal_matcher = proposal_matcher

        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]

        self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img
        self.topk_candidates = topk_candidates

        # used only on torchscript mode
        self._has_warned = False

    @torch.jit.unused
    def eager_outputs(self, losses, detections):
        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]
        if self.training:
            return losses

        return detections

    def compute_loss(self, targets, head_outputs, anchors):
        # type: (List[Dict[str, Tensor]], Dict[str, Tensor], List[Tensor]) -> Dict[str, Tensor]
        matched_idxs = []
        for anchors_per_img, targets_per_img in zip(anchors, targets):
            if targets_per_img["boxes"].numel() == 0:
                matched_idxs.append(torch.full((anchors_per_img.size(0),), -1, dtype=torch.int64))
                continue

            match_quality_matrix = box_ops.box_iou(targets_per_img["boxes"], anchors_per_img)
            matched_idxs.append(self.proposal_matcher(match_quality_matrix))

        return self.head.compute_loss(targets, head_outputs, anchors, matched_idxs)

    def postprocess_detections(self, head_output, anchors, image_shapes):
        # type: (Dict[str, List[Tensor]], List[List[Tensor]], List[Tuple[int, int]]) -> List[Dict[str, Tensor]]
        class_logits = head_output["cls_logits"]
        box_regression = head_output["bbox_regression"]

        num_img = len(image_shapes)

        detections: List[Dict[str, Tensor]] = []

        for index in range(num_img):
            box_regression_per_img = [br[index] for br in box_regression]
            logits_per_img = [cl[index] for cl in class_logits]
            anchors_per_img, image_shape = anchors[index], image_shapes[index]

            img_boxes = []
            img_scores = []
            img_labels = []

            for box_regression_per_level, logits_per_level, anchors_per_level in \
                    zip(box_regression_per_img, logits_per_img, anchors_per_img):
                num_classes = logits_per_level.shape[-1]

                # remove low scoring boxes
                # 移除低概率的目标
                scores_per_level = torch.sigmoid(logits_per_level).flatten()
                keep_idxs = torch.gt(scores_per_level, self.score_thresh)  # gt: >
                scores_per_level = scores_per_level[keep_idxs]
                topk_idxs = torch.where(keep_idxs)[0]

                # keep only topk scoring predictions
                # 在每个level上只取前topk个目标
                num_topk = min(self.topk_candidates, topk_idxs.size(0))
                scores_per_level, idxs = scores_per_level.topk(num_topk)
                topk_idxs = topk_idxs[idxs]

                anchor_idxs = topk_idxs // num_classes
                labels_per_level = topk_idxs % num_classes

                boxes_per_level = self.box_coder.decode_single(box_regression_per_level[anchor_idxs],
                                                               anchors_per_level[anchor_idxs])
                boxes_per_level = box_ops.clip_boxes_to_image(boxes_per_level, image_shape)

                img_boxes.append(boxes_per_level)
                img_scores.append(scores_per_level)
                img_labels.append(labels_per_level)

            img_boxes = torch.cat(img_boxes, dim=0)
            img_scores = torch.cat(img_scores, dim=0)
            img_labels = torch.cat(img_labels, dim=0)

            # non-maximum suppression
            keep = box_ops.batched_nms(img_boxes, img_scores, img_labels, self.nms_thresh)
            keep = keep[:self.detections_per_img]

            detections.append({
                "boxes": img_boxes[keep],
                "scores": img_scores[keep],
                "labels": img_labels[keep]
            })

        return detections

    def forward(self, images, targets=None):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Args:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")

        if self.training:
            assert targets is not None
            # check targets info
            for target in targets:
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError("Expected target boxes to be a tensor"
                                         "of shape [N, 4], got {:}.".format(boxes.shape))
                else:
                    raise ValueError("Expected target boxes to be of type "
                                     "Tensor, got {:}.".format(type(boxes)))

        # get the original images sizes
        original_img_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2
            original_img_sizes.append((val[0], val[1]))  # h, w

        # transform the input
        images, targets = self.transform(images, targets)

        # Check for degenerate boxes
        # TODO: Move this to a function
        if targets is not None:
            for target_idx, target in enumerate(targets):
                boxes = target["boxes"]
                degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
                if degenerate_boxes.any():
                    # print the first degenerate box
                    bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                    degen_bb: List[float] = boxes[bb_idx].tolist()
                    raise ValueError("All bounding boxes should have positive height and width."
                                     " Found invalid box {} for target at index {}."
                                     .format(degen_bb, target_idx))

        # get the features from the backbone
        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([("0", features)])

        features = list(features.values())

        # compute the retinanet heads outputs using the features
        head_outputs = self.head(features)

        # create the set of anchors
        anchors = self.anchor_generator(images, features)

        losses = {}
        detections: List[Dict[str, Tensor]] = []
        if self.training:
            assert targets is not None
            losses = self.compute_loss(targets, head_outputs, anchors)
        else:
            # recover level sizes
            num_anchors_per_level = [x.size(2) * x.size(3) for x in features]
            HW = 0
            for v in num_anchors_per_level:
                HW += v
            HWA = head_outputs["cls_logits"].size(1)
            A = HWA // HW
            num_anchors_per_level = [hw * A for hw in num_anchors_per_level]

            # split outputs per level
            split_head_outputs: Dict[str, List[Tensor]] = {}
            for k in head_outputs:
                split_head_outputs[k] = list(head_outputs[k].split(num_anchors_per_level, dim=1))
            split_anchors = [list(a.split(num_anchors_per_level)) for a in anchors]

            # compute the detections
            detections = self.postprocess_detections(split_head_outputs, split_anchors, images.image_sizes)
            detections = self.transform.postprocess(detections, images.image_sizes, original_img_sizes)

        if torch.jit.is_scripting():
            if not self._has_warned:
                warnings.warn("RetinaNet always returns a (Losses, Detections) tuple in scripting")
                self._has_warned = True
            return losses, detections
        return self.eager_outputs(losses, detections)


================================================
FILE: pytorch_object_detection/retinaNet/network_files/transform.py
================================================
import math
from typing import List, Tuple, Dict, Optional

import torch
from torch import nn, Tensor
import torchvision

from .image_list import ImageList


@torch.jit.unused
def _resize_image_onnx(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    from torch.onnx import operators
    im_shape = operators.shape_as_tensor(image)[-2:]
    min_size = torch.min(im_shape).to(dtype=torch.float32)
    max_size = torch.max(im_shape).to(dtype=torch.float32)
    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)

    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image


def _resize_image(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    im_shape = torch.tensor(image.shape[-2:])
    min_size = float(torch.min(im_shape))    # 获取高宽中的最小值
    max_size = float(torch.max(im_shape))    # 获取高宽中的最大值
    scale_factor = self_min_size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例

    # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长
    if max_size * scale_factor > self_max_size:
        scale_factor = self_max_size / max_size  # 将缩放比例设为指定最大边长和图片最大边长之比

    # interpolate利用插值的方法缩放图片
    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]
    # bilinear只支持4D Tensor
    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image


class GeneralizedRCNNTransform(nn.Module):
    """
    Performs input / target transformation before feeding the data to a GeneralizedRCNN
    model.

    The transformations it perform are:
        - input normalization (mean subtraction and std division)
        - input / target resizing to match min_size / max_size

    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
    """

    def __init__(self, min_size, max_size, image_mean, image_std):
        super(GeneralizedRCNNTransform, self).__init__()
        if not isinstance(min_size, (list, tuple)):
            min_size = (min_size,)
        self.min_size = min_size      # 指定图像的最小边长范围
        self.max_size = max_size      # 指定图像的最大边长范围
        self.image_mean = image_mean  # 指定图像在标准化处理中的均值
        self.image_std = image_std    # 指定图像在标准化处理中的方差

    def normalize(self, image):
        """标准化处理"""
        dtype, device = image.dtype, image.device
        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
        # [:, None, None]: shape [3] -> [3, 1, 1]
        return (image - mean[:, None, None]) / std[:, None, None]

    def torch_choice(self, k):
        # type: (List[int]) -> int
        """
        Implements `random.choice` via torch ops so it can be compiled with
        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
        is fixed.
        """
        index = int(torch.empty(1).uniform_(0., float(len(k))).item())
        return k[index]

    def resize(self, image, target):
        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
        """
        将图片缩放到指定的大小范围内，并对应缩放bboxes信息
        Args:
            image: 输入的图片
            target: 输入图片的相关信息（包括bboxes信息）

        Returns:
            image: 缩放后的图片
            target: 缩放bboxes后的图片相关信息
        """
        # image shape is [channel, height, width]
        h, w = image.shape[-2:]

        if self.training:
            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size
        else:
            # FIXME assume for now that testing uses the largest scale
            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size

        if torchvision._is_tracing():
            image = _resize_image_onnx(image, size, float(self.max_size))
        else:
            image = _resize_image(image, size, float(self.max_size))

        if target is None:
            return image, target

        bbox = target["boxes"]
        # 根据图像的缩放比例来缩放bbox
        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])
        target["boxes"] = bbox

        return image, target

    # _onnx_batch_images() is an implementation of
    # batch_images() that is supported by ONNX tracing.
    @torch.jit.unused
    def _onnx_batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        max_size = []
        for i in range(images[0].dim()):
            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
            max_size.append(max_size_i)
        stride = size_divisible
        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size = tuple(max_size)

        # work around for
        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
        # which is not yet supported in onnx
        padded_imgs = []
        for img in images:
            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])
            padded_imgs.append(padded_img)

        return torch.stack(padded_imgs)

    def max_by_axis(self, the_list):
        # type: (List[List[int]]) -> List[int]
        maxes = the_list[0]
        for sublist in the_list[1:]:
            for index, item in enumerate(sublist):
                maxes[index] = max(maxes[index], item)
        return maxes

    def batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        """
        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）
        Args:
            images: 输入的一批图片
            size_divisible: 将图像高和宽调整到该数的整数倍

        Returns:
            batched_imgs: 打包成一个batch后的tensor数据
        """

        if torchvision._is_tracing():
            # batch_images() does not export well to ONNX
            # call _onnx_batch_images() instead
            return self._onnx_batch_images(images, size_divisible)

        # 分别计算一个batch中所有图片中的最大channel, height, width
        max_size = self.max_by_axis([list(img.shape) for img in images])

        stride = float(size_divisible)
        # max_size = list(max_size)
        # 将height向上调整到stride的整数倍
        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
        # 将width向上调整到stride的整数倍
        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)

        # [batch, channel, height, width]
        batch_shape = [len(images)] + max_size

        # 创建shape为batch_shape且值全部为0的tensor
        batched_imgs = images[0].new_full(batch_shape, 0)
        for img, pad_img in zip(images, batched_imgs):
            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变
            # 这样保证输入到网络中一个batch的每张图片的shape相同
            # copy_: Copies the elements from src into self tensor and returns self
            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)

        return batched_imgs

    def postprocess(self,
                    result,                # type: List[Dict[str, Tensor]]
                    image_shapes,          # type: List[Tuple[int, int]]
                    original_image_sizes   # type: List[Tuple[int, int]]
                    ):
        # type: (...) -> List[Dict[str, Tensor]]
        """
        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        Args:
            result: list(dict), 网络的预测结果, len(result) == batch_size
            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size
            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size

        Returns:

        """
        if self.training:
            return result

        # 遍历每张图片的预测信息，将boxes信息还原回原尺度
        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
            boxes = pred["boxes"]
            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上
            result[i]["boxes"] = boxes
        return result

    def __repr__(self):
        """自定义输出实例化对象的信息，可通过print打印实例信息"""
        format_string = self.__class__.__name__ + '('
        _indent = '\n    '
        format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std)
        format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size,
                                                                                         self.max_size)
        format_string += '\n)'
        return format_string

    def forward(self,
                images,       # type: List[Tensor]
                targets=None  # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]
        images = [img for img in images]
        for i in range(len(images)):
            image = images[i]
            target_index = targets[i] if targets is not None else None

            if image.dim() != 3:
                raise ValueError("images is expected to be a list of 3d tensors "
                                 "of shape [C, H, W], got {}".format(image.shape))
            image = self.normalize(image)                # 对图像进行标准化处理
            image, target_index = self.resize(image, target_index)   # 对图像和对应的bboxes缩放到指定范围
            images[i] = image
            if targets is not None and target_index is not None:
                targets[i] = target_index

        # 记录resize后的图像尺寸
        image_sizes = [img.shape[-2:] for img in images]
        images = self.batch_images(images)  # 将images打包成一个batch
        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])

        for image_size in image_sizes:
            assert len(image_size) == 2
            image_sizes_list.append((image_size[0], image_size[1]))

        image_list = ImageList(images, image_sizes_list)
        return image_list, targets


def resize_boxes(boxes, original_size, new_size):
    # type: (Tensor, List[int], List[int]) -> Tensor
    """
    将boxes参数根据图像的缩放情况进行相应缩放

    Arguments:
        original_size: 图像缩放前的尺寸
        new_size: 图像缩放后的尺寸
    """
    ratios = [
        torch.tensor(s, dtype=torch.float32, device=boxes.device) /
        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
        for s, s_orig in zip(new_size, original_size)
    ]
    ratios_height, ratios_width = ratios
    # Removes a tensor dimension, boxes [minibatch, 4]
    # Returns a tuple of all slices along a given dimension, already without it.
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    xmin = xmin * ratios_width
    xmax = xmax * ratios_width
    ymin = ymin * ratios_height
    ymax = ymax * ratios_height
    return torch.stack((xmin, ymin, xmax, ymax), dim=1)


================================================
FILE: pytorch_object_detection/retinaNet/pascal_voc_classes.json
================================================
{
    "aeroplane": 0,
    "bicycle": 1,
    "bird": 2,
    "boat": 3,
    "bottle": 4,
    "bus": 5,
    "car": 6,
    "cat": 7,
    "chair": 8,
    "cow": 9,
    "diningtable": 10,
    "dog": 11,
    "horse": 12,
    "motorbike": 13,
    "person": 14,
    "pottedplant": 15,
    "sheep": 16,
    "sofa": 17,
    "train": 18,
    "tvmonitor": 19
}

================================================
FILE: pytorch_object_detection/retinaNet/plot_curve.py
================================================
import datetime
import matplotlib.pyplot as plt


def plot_loss_and_lr(train_loss, learning_rate):
    try:
        x = list(range(len(train_loss)))
        fig, ax1 = plt.subplots(1, 1)
        ax1.plot(x, train_loss, 'r', label='loss')
        ax1.set_xlabel("step")
        ax1.set_ylabel("loss")
        ax1.set_title("Train Loss and lr")
        plt.legend(loc='best')

        ax2 = ax1.twinx()
        ax2.plot(x, learning_rate, label='lr')
        ax2.set_ylabel("learning rate")
        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
        plt.legend(loc='best')

        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
        plt.close()
        print("successful save loss curve! ")
    except Exception as e:
        print(e)


def plot_map(mAP):
    try:
        x = list(range(len(mAP)))
        plt.plot(x, mAP, label='mAp')
        plt.xlabel('epoch')
        plt.ylabel('mAP')
        plt.title('Eval mAP')
        plt.xlim(0, len(mAP))
        plt.legend(loc='best')
        plt.savefig('./mAP.png')
        plt.close()
        print("successful save mAP curve!")
    except Exception as e:
        print(e)


================================================
FILE: pytorch_object_detection/retinaNet/predict.py
================================================
import os
import time
import json

import torch
from PIL import Image
import matplotlib.pyplot as plt

from torchvision import transforms
from network_files import RetinaNet
from backbone import resnet50_fpn_backbone, LastLevelP6P7
from draw_box_utils import draw_objs


def create_model(num_classes):
    # resNet50+fpn+retinanet
    # 注意，这里的norm_layer要和训练脚本中保持一致
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,
                                     returned_layers=[2, 3, 4],
                                     extra_blocks=LastLevelP6P7(256, 256))
    model = RetinaNet(backbone, num_classes)

    return model


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    # 注意：不包含背景
    model = create_model(num_classes=20)

    # load train weights
    weights_path = "./save_weights/model.pth"
    assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {str(v): str(k) for k, v in class_dict.items()}

    # load image
    original_img = Image.open("./test.jpg")

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.ToTensor()])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        predictions = model(img.to(device))[0]
        t_end = time_synchronized()
        print("inference+NMS time: {}".format(t_end - t_start))

        predict_boxes = predictions["boxes"].to("cpu").numpy()
        predict_classes = predictions["labels"].to("cpu").numpy()
        predict_scores = predictions["scores"].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        plot_img = draw_objs(original_img,
                             predict_boxes,
                             predict_classes,
                             predict_scores,
                             category_index=category_index,
                             box_thresh=0.5,
                             line_thickness=3,
                             font='arial.ttf',
                             font_size=20)
        plt.imshow(plot_img)
        plt.show()
        # 保存预测的图片结果
        plot_img.save("test_result.jpg")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/retinaNet/requirements.txt
================================================
lxml
matplotlib
numpy
tqdm
torch==1.7.1
torchvision==0.8.2
pycocotools
Pillow


================================================
FILE: pytorch_object_detection/retinaNet/results20210421-142632.txt
================================================
epoch:0 0.4012  0.6088  0.4334  0.1691  0.3113  0.4498  0.4265  0.6233  0.6478  0.3362  0.5541  0.6977  1.0681  0.01
epoch:1 0.5028  0.7295  0.5441  0.2219  0.3913  0.5552  0.4624  0.6649  0.6875  0.4039  0.5928  0.7346  0.5422  0.01
epoch:2 0.5311  0.7614  0.5784  0.2439  0.4189  0.5852  0.4733  0.6774  0.698  0.417  0.6105  0.7441  0.4456  0.01
epoch:3 0.5439  0.7762  0.595  0.2412  0.4292  0.5996  0.4773  0.6835  0.7021  0.4137  0.6074  0.7494  0.3872  0.01
epoch:4 0.5404  0.7739  0.5949  0.2457  0.426  0.5968  0.4723  0.6818  0.7007  0.4363  0.6047  0.7479  0.347  0.01
epoch:5 0.5513  0.7867  0.6021  0.2415  0.4265  0.6087  0.4811  0.685  0.7041  0.4073  0.6088  0.7526  0.3166  0.01
epoch:6 0.5508  0.7909  0.6014  0.2327  0.4211  0.6116  0.478  0.6803  0.699  0.4081  0.5994  0.7485  0.2884  0.01
epoch:7 0.5617  0.7972  0.6142  0.2431  0.427  0.6223  0.4848  0.6862  0.7049  0.4184  0.6018  0.7551  0.2546  0.001
epoch:8 0.561  0.7986  0.6117  0.2342  0.4268  0.6223  0.4842  0.6855  0.705  0.4153  0.6051  0.7551  0.2462  0.001
epoch:9 0.563  0.7983  0.6153  0.2359  0.4336  0.6237  0.4849  0.6884  0.7068  0.4103  0.6063  0.7574  0.2428  0.001
epoch:10 0.563  0.7991  0.6167  0.2363  0.4334  0.6234  0.4854  0.6879  0.7062  0.4152  0.6063  0.7558  0.2391  0.001
epoch:11 0.5637  0.7984  0.6145  0.2341  0.4345  0.6241  0.4842  0.6894  0.7083  0.4136  0.6074  0.7581  0.2355  0.001
epoch:12 0.5624  0.7969  0.6155  0.2373  0.4292  0.623  0.4853  0.6866  0.7055  0.4136  0.6026  0.756  0.2323  0.0001
epoch:13 0.5632  0.7985  0.6155  0.2358  0.4342  0.6243  0.4858  0.6878  0.7065  0.4206  0.6039  0.7576  0.2307  0.0001
epoch:14 0.562  0.7977  0.6155  0.2309  0.4291  0.6234  0.4849  0.6869  0.7051  0.4198  0.6023  0.7558  0.2305  0.0001
epoch:15 0.5631  0.7984  0.6155  0.2324  0.4326  0.6238  0.4849  0.6876  0.706  0.4151  0.6039  0.7565  0.2313  0.0001
epoch:16 0.5632  0.7992  0.6164  0.2349  0.429  0.6245  0.4859  0.6871  0.7063  0.4186  0.604  0.7569  0.2302  0.0001
epoch:17 0.5637  0.7994  0.6164  0.2325  0.4312  0.6245  0.4854  0.6873  0.706  0.4109  0.6023  0.7567  0.2312  0.0001
epoch:18 0.5626  0.7984  0.6132  0.2333  0.431  0.6238  0.4854  0.6873  0.7056  0.4158  0.6025  0.7564  0.2298  0.0001
epoch:19 0.5613  0.7981  0.612  0.2365  0.4278  0.622  0.4855  0.6867  0.7047  0.4112  0.6  0.7554  0.2305  0.0001


================================================
FILE: pytorch_object_detection/retinaNet/train.py
================================================
import os
import datetime

import torch

import transforms
from backbone import resnet50_fpn_backbone, LastLevelP6P7
from network_files import RetinaNet
from my_dataset import VOCDataSet
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from train_utils import train_eval_utils as utils


def create_model(num_classes):
    # 创建retinanet_res50_fpn模型
    # skip P2 because it generates too many anchors (according to their paper)
    # 注意，这里的backbone默认使用的是FrozenBatchNorm2d，即不会去更新bn参数
    # 目的是为了防止batch_size太小导致效果更差(如果显存很小，建议使用默认的FrozenBatchNorm2d)
    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,
                                     returned_layers=[2, 3, 4],
                                     extra_blocks=LastLevelP6P7(256, 256),
                                     trainable_layers=3)
    model = RetinaNet(backbone, num_classes)

    # 载入预训练权重
    # https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth
    weights_dict = torch.load("./backbone/retinanet_resnet50_fpn.pth", map_location='cpu')
    # 删除分类器部分的权重，因为自己的数据集类别与预训练数据集类别(91)不一定致，如果载入会出现冲突
    del_keys = ["head.classification_head.cls_logits.weight", "head.classification_head.cls_logits.bias"]
    for k in del_keys:
        del weights_dict[k]
    print(model.load_state_dict(weights_dict, strict=False))

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)
    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=nw,
                                                  collate_fn=val_dataset.collate_fn)

    # create model
    # 注意：不包含背景
    model = create_model(num_classes=args.num_classes)
    # print(model)

    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.33)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if args.resume != "":
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])
        print("the training process from epoch{}...".format(args.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 10 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device, epoch, print_freq=50,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal map

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=15, type=int, metavar='N',
                        help='number of total epochs to run')
    # 训练的batch size
    parser.add_argument('--batch_size', default=4, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/retinaNet/train_multi_GPU.py
================================================
import os
import time
import datetime

import torch

import transforms
from backbone import resnet50_fpn_backbone, LastLevelP6P7
from network_files import RetinaNet
from my_dataset import VOCDataSet
from train_utils import train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir


def create_model(num_classes):
    # 创建retinanet_res50_fpn模型
    # skip P2 because it generates too many anchors (according to their paper)
    # 注意，这里的backbone默认使用的是FrozenBatchNorm2d，即不会去更新bn参数
    # 目的是为了防止batch_size太小导致效果更差(如果显存很小，建议使用默认的FrozenBatchNorm2d)
    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,
                                     returned_layers=[2, 3, 4],
                                     extra_blocks=LastLevelP6P7(256, 256),
                                     trainable_layers=3)
    model = RetinaNet(backbone, num_classes)

    # 载入预训练权重
    # https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth
    weights_dict = torch.load("./backbone/retinanet_resnet50_fpn.pth", map_location='cpu')
    # 删除分类器部分的权重，因为自己的数据集类别与预训练数据集类别(91)不一定致，如果载入会出现冲突
    del_keys = ["head.classification_head.cls_logits.weight", "head.classification_head.cls_logits.bias"]
    for k in del_keys:
        del weights_dict[k]
    print(model.load_state_dict(weights_dict, strict=False))

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # Data loading code
    print("Loading data")

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt")

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    if args.aspect_ratio_group_factor >= 0:
        # 统计所有图像比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model
    # 注意：不包含背景
    model = create_model(num_classes=args.num_classes)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        utils.evaluate(model, data_loader_test, device=device)
        return

    train_loss = []
    learning_rate = []
    val_map = []

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,
                                              device, epoch, args.print_freq,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update learning rate
        lr_scheduler.step()

        # evaluate after every epoch
        coco_info = utils.evaluate(model, data_loader_test, device=device)
        val_map.append(coco_info[1])  # pascal mAP

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_files = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch}
            if args.amp:
                save_files["scaler"] = scaler.state_dict()
            save_on_master(save_files,
                           os.path.join(args.output_dir, f'model_{epoch}.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    if args.rank in [-1, 0]:
        # plot loss and lr curve
        if len(train_loss) != 0 and len(learning_rate) != 0:
            from plot_curve import plot_loss_and_lr
            plot_loss_and_lr(train_loss, learning_rate)

        # plot mAP curve
        if len(val_map) != 0:
            from plot_curve import plot_map
            plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=20, type=int, metavar='N',
                        help='number of total epochs to run')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU
    parser.add_argument('--lr', default=0.02, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.StepLR的参数
    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 开启的进程数(注意不是线程)
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/retinaNet/train_utils/__init__.py
================================================
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .distributed_utils import init_distributed_mode, save_on_master, mkdir
from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator


================================================
FILE: pytorch_object_detection/retinaNet/train_utils/coco_eval.py
================================================
import json
from collections import defaultdict

import numpy as np
import copy
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util

from .distributed_utils import all_gather


class CocoEvaluator(object):
    def __init__(self, coco_gt, iou_types):
        assert isinstance(iou_types, (list, tuple))
        coco_gt = copy.deepcopy(coco_gt)
        self.coco_gt = coco_gt

        self.iou_types = iou_types
        self.coco_eval = {}
        for iou_type in iou_types:
            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)

        self.img_ids = []
        self.eval_imgs = {k: [] for k in iou_types}

    def update(self, predictions):
        img_ids = list(np.unique(list(predictions.keys())))
        self.img_ids.extend(img_ids)

        for iou_type in self.iou_types:
            results = self.prepare(predictions, iou_type)
            coco_dt = loadRes(self.coco_gt, results) if results else COCO()
            coco_eval = self.coco_eval[iou_type]

            coco_eval.cocoDt = coco_dt
            coco_eval.params.imgIds = list(img_ids)
            img_ids, eval_imgs = evaluate(coco_eval)

            self.eval_imgs[iou_type].append(eval_imgs)

    def synchronize_between_processes(self):
        for iou_type in self.iou_types:
            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])

    def accumulate(self):
        for coco_eval in self.coco_eval.values():
            coco_eval.accumulate()

    def summarize(self):
        for iou_type, coco_eval in self.coco_eval.items():
            print("IoU metric: {}".format(iou_type))
            coco_eval.summarize()

    def prepare(self, predictions, iou_type):
        if iou_type == "bbox":
            return self.prepare_for_coco_detection(predictions)
        elif iou_type == "segm":
            return self.prepare_for_coco_segmentation(predictions)
        elif iou_type == "keypoints":
            return self.prepare_for_coco_keypoint(predictions)
        else:
            raise ValueError("Unknown iou type {}".format(iou_type))

    def prepare_for_coco_detection(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "bbox": box,
                        "score": scores[k],
                    }
                    for k, box in enumerate(boxes)
                ]
            )
        return coco_results

    def prepare_for_coco_segmentation(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            scores = prediction["scores"]
            labels = prediction["labels"]
            masks = prediction["masks"]

            masks = masks > 0.5

            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            rles = [
                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                for mask in masks
            ]
            for rle in rles:
                rle["counts"] = rle["counts"].decode("utf-8")

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "segmentation": rle,
                        "score": scores[k],
                    }
                    for k, rle in enumerate(rles)
                ]
            )
        return coco_results

    def prepare_for_coco_keypoint(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            keypoints = prediction["keypoints"]
            keypoints = keypoints.flatten(start_dim=1).tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        'keypoints': keypoint,
                        "score": scores[k],
                    }
                    for k, keypoint in enumerate(keypoints)
                ]
            )
        return coco_results


def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)


def merge(img_ids, eval_imgs):
    all_img_ids = all_gather(img_ids)
    all_eval_imgs = all_gather(eval_imgs)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_imgs = []
    for p in all_eval_imgs:
        merged_eval_imgs.append(p)

    merged_img_ids = np.array(merged_img_ids)
    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)

    # keep only unique (and in sorted order) images
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_imgs = merged_eval_imgs[..., idx]

    return merged_img_ids, merged_eval_imgs


def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
    img_ids, eval_imgs = merge(img_ids, eval_imgs)
    img_ids = list(img_ids)
    eval_imgs = list(eval_imgs.flatten())

    coco_eval.evalImgs = eval_imgs
    coco_eval.params.imgIds = img_ids
    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)


#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################

# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions

def createIndex(self):
    # create index
    # print('creating index...')
    anns, cats, imgs = {}, {}, {}
    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
    if 'annotations' in self.dataset:
        for ann in self.dataset['annotations']:
            imgToAnns[ann['image_id']].append(ann)
            anns[ann['id']] = ann

    if 'images' in self.dataset:
        for img in self.dataset['images']:
            imgs[img['id']] = img

    if 'categories' in self.dataset:
        for cat in self.dataset['categories']:
            cats[cat['id']] = cat

    if 'annotations' in self.dataset and 'categories' in self.dataset:
        for ann in self.dataset['annotations']:
            catToImgs[ann['category_id']].append(ann['image_id'])

    # print('index created!')

    # create class members
    self.anns = anns
    self.imgToAnns = imgToAnns
    self.catToImgs = catToImgs
    self.imgs = imgs
    self.cats = cats


maskUtils = mask_util


def loadRes(self, resFile):
    """
    Load result file and return a result api object.
    :param   resFile (str)     : file name of result file
    :return: res (obj)         : result api object
    """
    res = COCO()
    res.dataset['images'] = [img for img in self.dataset['images']]

    # print('Loading and preparing results...')
    # tic = time.time()
    if isinstance(resFile, torch._six.string_classes):
        anns = json.load(open(resFile))
    elif type(resFile) == np.ndarray:
        anns = self.loadNumpyAnnotations(resFile)
    else:
        anns = resFile
    assert type(anns) == list, 'results in not an array of objects'
    annsImgIds = [ann['image_id'] for ann in anns]
    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
        'Results do not correspond to current coco set'
    if 'caption' in anns[0]:
        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
        for id, ann in enumerate(anns):
            ann['id'] = id + 1
    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            bb = ann['bbox']
            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
            if 'segmentation' not in ann:
                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
            ann['area'] = bb[2] * bb[3]
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'segmentation' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            # now only support compressed RLE format as segmentation results
            ann['area'] = maskUtils.area(ann['segmentation'])
            if 'bbox' not in ann:
                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'keypoints' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            s = ann['keypoints']
            x = s[0::3]
            y = s[1::3]
            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
            ann['area'] = (x2 - x1) * (y2 - y1)
            ann['id'] = id + 1
            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))

    res.dataset['annotations'] = anns
    createIndex(res)
    return res


def evaluate(self):
    '''
    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
    :return: None
    '''
    # tic = time.time()
    # print('Running per image evaluation...')
    p = self.params
    # add backward compatibility if useSegm is specified in params
    if p.useSegm is not None:
        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
    # print('Evaluate annotation type *{}*'.format(p.iouType))
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)
    self.params = p

    self._prepare()
    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]

    if p.iouType == 'segm' or p.iouType == 'bbox':
        computeIoU = self.computeIoU
    elif p.iouType == 'keypoints':
        computeIoU = self.computeOks
    self.ious = {
        (imgId, catId): computeIoU(imgId, catId)
        for imgId in p.imgIds
        for catId in catIds}

    evaluateImg = self.evaluateImg
    maxDet = p.maxDets[-1]
    evalImgs = [
        evaluateImg(imgId, catId, areaRng, maxDet)
        for catId in catIds
        for areaRng in p.areaRng
        for imgId in p.imgIds
    ]
    # this is NOT in the pycocotools code, but could be done outside
    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
    self._paramsEval = copy.deepcopy(self.params)
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format(toc-tic))
    return p.imgIds, evalImgs

#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################


================================================
FILE: pytorch_object_detection/retinaNet/train_utils/coco_utils.py
================================================
import torch
import torchvision
import torch.utils.data
from pycocotools.coco import COCO


def convert_to_coco_api(ds):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0
    ann_id = 1
    dataset = {'images': [], 'categories': [], 'annotations': []}
    categories = set()
    for img_idx in range(len(ds)):
        # find better way to get target
        hw, targets = ds.coco_index(img_idx)
        image_id = targets["image_id"].item()
        img_dict = {}
        img_dict['id'] = image_id
        img_dict['height'] = hw[0]
        img_dict['width'] = hw[1]
        dataset['images'].append(img_dict)
        bboxes = targets["boxes"]
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets['labels'].tolist()
        areas = targets['area'].tolist()
        iscrowd = targets['iscrowd'].tolist()
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {}
            ann['image_id'] = image_id
            ann['bbox'] = bboxes[i]
            ann['category_id'] = labels[i]
            categories.add(labels[i])
            ann['area'] = areas[i]
            ann['iscrowd'] = iscrowd[i]
            ann['id'] = ann_id
            dataset['annotations'].append(ann)
            ann_id += 1
    dataset['categories'] = [{'id': i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds


def get_coco_api_from_dataset(dataset):
    for _ in range(10):
        if isinstance(dataset, torchvision.datasets.CocoDetection):
            break
        if isinstance(dataset, torch.utils.data.Subset):
            dataset = dataset.dataset
    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return dataset.coco
    return convert_to_coco_api(dataset)


================================================
FILE: pytorch_object_detection/retinaNet/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,

                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_object_detection/retinaNet/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k+1等份
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_object_detection/retinaNet/train_utils/train_eval_utils.py
================================================
import math
import sys
import time

import torch

from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator
import train_utils.distributed_utils as utils


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=50, warmup=False, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    mloss = torch.zeros(1).to(device)  # mean losses
    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purpose
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        # 记录训练损失
        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses

        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, device):

    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)

        # 当使用CPU时，跳过GPU相关指令
        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list

    return coco_info


def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    return iou_types


================================================
FILE: pytorch_object_detection/retinaNet/transforms.py
================================================
import random
from torchvision.transforms import functional as F


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class ToTensor(object):
    """将PIL图像转为Tensor"""
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target


class RandomHorizontalFlip(object):
    """随机水平翻转图像以及bboxes"""
    def __init__(self, prob=0.5):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)  # 水平翻转图片
            bbox = target["boxes"]
            # bbox: xmin, ymin, xmax, ymax
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
            target["boxes"] = bbox
        return image, target


================================================
FILE: pytorch_object_detection/retinaNet/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
以及每个类别的mAP(IoU=0.5)
"""

import os
import json

import torch
from tqdm import tqdm
import numpy as np

import transforms
from network_files import RetinaNet
from backbone import resnet50_fpn_backbone, LastLevelP6P7
from my_dataset import VOCDataSet
from train_utils import get_coco_api_from_dataset, CocoEvaluator


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 12, [""] * 12
    stats[0], print_list[0] = _summarize(1)
    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])
    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])
    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])
    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {
        "val": transforms.Compose([transforms.ToTensor()])
    }

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {v: k for k, v in class_dict.items()}

    VOC_root = parser_data.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt")
    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=nw,
                                                     pin_memory=True,
                                                     collate_fn=val_dataset.collate_fn)

    # create model
    # 注意，这里的norm_layer要和训练脚本中保持一致
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,
                                     returned_layers=[2, 3, 4],
                                     extra_blocks=LastLevelP6P7(256, 256))
    model = RetinaNet(backbone, parser_data.num_classes)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights_path
    assert os.path.exists(weights_path), "not found {} file.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    # print(model)

    model.to(device)

    # evaluate on the test dataset
    coco = get_coco_api_from_dataset(val_dataset)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)
    cpu_device = torch.device("cpu")

    model.eval()
    with torch.no_grad():
        for image, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            image = list(img.to(device) for img in image)

            # inference
            outputs = model(image)

            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            coco_evaluator.update(res)

    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_eval = coco_evaluator.coco_eval["bbox"]
    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_eval)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    for i in range(len(category_index)):
        stats, _ = summarize(coco_eval, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(category_index[i], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = ["COCO results:",
                        print_coco,
                        "",
                        "mAP(IoU=0.5) for each category:",
                        print_voc]
        f.write("\n".join(record_lines))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda:0', help='device')

    # 检测目标类别数
    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')

    # 数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data', help='dataset root')

    # 训练好的权重文件
    parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights')

    # batch size
    parser.add_argument('--batch_size', default=1, type=int, metavar='N',
                        help='batch size when validation.')

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_object_detection/ssd/README.md
================================================
# SSD: Single Shot MultiBox Detector

## 环境配置：
* Python 3.6/3.7/3.8
* Pytorch 1.7.1
* pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs))
* Ubuntu或Centos(不建议Windows)
* 最好使用GPU训练

## 文件结构：
```
├── src: 实现SSD模型的相关模块    
│     ├── resnet50_backbone.py   使用resnet50网络作为SSD的backbone  
│     ├── ssd_model.py           SSD网络结构文件 
│     └── utils.py               训练过程中使用到的一些功能实现
├── train_utils: 训练验证相关模块（包括cocotools）  
├── my_dataset.py: 自定义dataset用于读取VOC数据集    
├── train_ssd300.py: 以resnet50做为backbone的SSD网络进行训练    
├── train_multi_GPU.py: 针对使用多GPU的用户使用    
├── predict_test.py: 简易的预测脚本，使用训练好的权重进行预测测试    
├── pascal_voc_classes.json: pascal_voc标签文件    
├── plot_curve.py: 用于绘制训练过程的损失以及验证集的mAP
└── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
```

## 预训练权重下载地址（下载后放入src文件夹中）：
* ResNet50+SSD: https://ngc.nvidia.com/catalog/models  
 `搜索ssd -> 找到SSD for PyTorch(FP32) -> download FP32 -> 解压文件`
* 如果找不到可通过百度网盘下载，链接:https://pan.baidu.com/s/1byOnoNuqmBLZMDA0-lbCMQ 提取码:iggj 

## 数据集，本例程使用的是PASCAL VOC2012数据集(下载后放入项目当前文件夹中)
* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* Pascal VOC2007 test数据集请参考：http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 单GPU训练或CPU，直接使用train_ssd300.py训练脚本
* 若要使用多GPU训练，使用 "python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py" 指令,nproc_per_node参数为使用GPU数量
* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率

## 如果对SSD算法原理不是很理解可参考我的bilibili
* https://www.bilibili.com/video/BV1fT4y1L7Gi

## 进一步了解该项目，以及对SSD算法代码的分析可参考我的bilibili
* https://www.bilibili.com/video/BV1vK411H771/

## Resnet50 + SSD算法框架图
![Resnet50 SSD](res50_ssd.png) 


================================================
FILE: pytorch_object_detection/ssd/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = False):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: pytorch_object_detection/ssd/my_dataset.py
================================================
from torch.utils.data import Dataset
import os
import torch
import json
from PIL import Image
from lxml import etree


class VOCDataSet(Dataset):
    """读取解析PASCAL VOC2007/2012数据集"""

    def __init__(self, voc_root, year="2012", transforms=None, train_set='train.txt'):
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        # 增加容错能力
        if "VOCdevkit" in voc_root:
            self.root = os.path.join(voc_root, f"VOC{year}")
        else:
            self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        self.img_root = os.path.join(self.root, "JPEGImages")
        self.annotations_root = os.path.join(self.root, "Annotations")

        txt_list = os.path.join(self.root, "ImageSets", "Main", train_set)

        with open(txt_list) as read:
            self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
                             for line in read.readlines() if len(line.strip()) > 0]

        # read class_indict
        json_file = "./pascal_voc_classes.json"
        assert os.path.exists(json_file), "{} file not exist.".format(json_file)
        with open(json_file, 'r') as f:
            self.class_dict = json.load(f)

        self.transforms = transforms

    def __len__(self):
        return len(self.xml_list)

    def __getitem__(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        height_width = [data_height, data_width]
        img_path = os.path.join(self.img_root, data["filename"])
        image = Image.open(img_path)
        if image.format != "JPEG":
            raise ValueError("Image '{}' format not JPEG".format(img_path))

        assert "object" in data, "{} lack of object information.".format(xml_path)
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            # 将所有的gt box信息转换成相对值0-1之间
            xmin = float(obj["bndbox"]["xmin"]) / data_width
            xmax = float(obj["bndbox"]["xmax"]) / data_width
            ymin = float(obj["bndbox"]["ymin"]) / data_height
            ymax = float(obj["bndbox"]["ymax"]) / data_height

            # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan
            if xmax <= xmin or ymax <= ymin:
                print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path))
                continue
                
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            if "difficult" in obj:
                iscrowd.append(int(obj["difficult"]))
            else:
                iscrowd.append(0)

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        height_width = torch.as_tensor(height_width, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["height_width"] = height_width

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def get_height_and_width(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        return data_height, data_width

    def parse_xml_to_dict(self, xml):
        """
        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
        Args：
            xml: xml tree obtained by parsing XML file contents using lxml.etree

        Returns:
            Python dictionary holding XML contents.
        """

        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
            return {xml.tag: xml.text}

        result = {}
        for child in xml:
            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}

    def coco_index(self, idx):
        """
        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理
        由于不用去读取图片，可大幅缩减统计时间

        Args:
            idx: 输入需要获取图像的索引
        """
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        height_width = [data_height, data_width]
        # img_path = os.path.join(self.img_root, data["filename"])
        # image = Image.open(img_path)
        # if image.format != "JPEG":
        #     raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            # 将所有的gt box信息转换成相对值0-1之间
            xmin = float(obj["bndbox"]["xmin"]) / data_width
            xmax = float(obj["bndbox"]["xmax"]) / data_width
            ymin = float(obj["bndbox"]["ymin"]) / data_height
            ymax = float(obj["bndbox"]["ymax"]) / data_height
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        height_width = torch.as_tensor(height_width, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["height_width"] = height_width

        return target

    @staticmethod
    def collate_fn(batch):
        images, targets = tuple(zip(*batch))
        # images = torch.stack(images, dim=0)
        #
        # boxes = []
        # labels = []
        # img_id = []
        # for t in targets:
        #     boxes.append(t['boxes'])
        #     labels.append(t['labels'])
        #     img_id.append(t["image_id"])
        # targets = {"boxes": torch.stack(boxes, dim=0),
        #            "labels": torch.stack(labels, dim=0),
        #            "image_id": torch.as_tensor(img_id)}

        return images, targets

# import transforms
# from draw_box_utils import draw_objs
# from PIL import Image
# import json
# import matplotlib.pyplot as plt
# import torchvision.transforms as ts
# import random
#
# # read class_indict
# category_index = {}
# try:
#     json_file = open('./pascal_voc_classes.json', 'r')
#     class_dict = json.load(json_file)
#     category_index = {str(v): str(k) for k, v in class_dict.items()}
# except Exception as e:
#     print(e)
#     exit(-1)
#
# data_transform = {
#     "train": transforms.Compose([transforms.ToTensor(),
#                                  transforms.RandomHorizontalFlip(0.5)]),
#     "val": transforms.Compose([transforms.ToTensor()])
# }
#
# # load train data set
# train_data_set = VOCDataSet(os.getcwd(), "2012", data_transform["train"], "train.txt")
# print(len(train_data_set))
# for index in random.sample(range(0, len(train_data_set)), k=5):
#     img, target = train_data_set[index]
#     img = ts.ToPILImage()(img)
#     plot_img = draw_objs(img,
#                          target["boxes"].numpy(),
#                          target["labels"].numpy(),
#                          np.ones(target["labels"].shape[0]),
#                          category_index=category_index,
#                          box_thresh=0.5,
#                          line_thickness=3,
#                          font='arial.ttf',
#                          font_size=20)
#     plt.imshow(plot_img)
#     plt.show()


================================================
FILE: pytorch_object_detection/ssd/pascal_voc_classes.json
================================================
{
    "aeroplane": 1,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
}

================================================
FILE: pytorch_object_detection/ssd/plot_curve.py
================================================
import datetime
import matplotlib.pyplot as plt


def plot_loss_and_lr(train_loss, learning_rate):
    try:
        x = list(range(len(train_loss)))
        fig, ax1 = plt.subplots(1, 1)
        ax1.plot(x, train_loss, 'r', label='loss')
        ax1.set_xlabel("epoch")
        ax1.set_ylabel("loss")
        ax1.set_title("Train Loss and lr")
        plt.legend(loc='best')

        ax2 = ax1.twinx()
        ax2.plot(x, learning_rate, label='lr')
        ax2.set_ylabel("learning rate")
        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
        plt.legend(loc='best')

        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
        plt.close()
        print("successful save loss curve! ")
    except Exception as e:
        print(e)


def plot_map(mAP):
    try:
        x = list(range(len(mAP)))
        plt.plot(x, mAP, label='mAp')
        plt.xlabel('epoch')
        plt.ylabel('mAP')
        plt.title('Eval mAP')
        plt.xlim(0, len(mAP))
        plt.legend(loc='best')
        plt.savefig('./mAP.png')
        plt.close()
        print("successful save mAP curve!")
    except Exception as e:
        print(e)


================================================
FILE: pytorch_object_detection/ssd/predict_test.py
================================================
import os
import json
import time

import torch
from PIL import Image
import matplotlib.pyplot as plt

import transforms
from src import SSD300, Backbone
from draw_box_utils import draw_objs


def create_model(num_classes):
    backbone = Backbone()
    model = SSD300(backbone=backbone, num_classes=num_classes)

    return model


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    # create model
    # 目标检测数 + 背景
    num_classes = 20 + 1
    model = create_model(num_classes=num_classes)

    # load train weights
    weights_path = "./save_weights/ssd300-14.pth"
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    # read class_indict
    json_path = "./pascal_voc_classes.json"
    assert os.path.exists(json_path), "file '{}' dose not exist.".format(json_path)
    json_file = open(json_path, 'r')
    class_dict = json.load(json_file)
    json_file.close()
    category_index = {str(v): str(k) for k, v in class_dict.items()}

    # load image
    original_img = Image.open("./test.jpg")

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.Resize(),
                                         transforms.ToTensor(),
                                         transforms.Normalization()])
    img, _ = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()
    with torch.no_grad():
        # initial model
        init_img = torch.zeros((1, 3, 300, 300), device=device)
        model(init_img)

        time_start = time_synchronized()
        predictions = model(img.to(device))[0]  # bboxes_out, labels_out, scores_out
        time_end = time_synchronized()
        print("inference+NMS time: {}".format(time_end - time_start))

        predict_boxes = predictions[0].to("cpu").numpy()
        predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0]
        predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1]
        predict_classes = predictions[1].to("cpu").numpy()
        predict_scores = predictions[2].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        plot_img = draw_objs(original_img,
                             predict_boxes,
                             predict_classes,
                             predict_scores,
                             category_index=category_index,
                             box_thresh=0.5,
                             line_thickness=3,
                             font='arial.ttf',
                             font_size=20)
        plt.imshow(plot_img)
        plt.show()
        # 保存预测的图片结果
        plot_img.save("test_result.jpg")


if __name__ == "__main__":
    main()


================================================
FILE: pytorch_object_detection/ssd/record_mAP.txt
================================================
COCO results:
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.448
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.721
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.482
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.099
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.280
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.418
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.565
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.573
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641

mAP(IoU=0.5) for each category:
 aeroplane      : 0.8532360243584314
 bicycle        : 0.7496603797780927
 bird           : 0.7658478672087958
 boat           : 0.6079142920471263
 bottle         : 0.4986565020053691
 bus            : 0.8229568428349553
 car            : 0.7940868387465018
 cat            : 0.8800145761338203
 chair          : 0.5090524550010037
 cow            : 0.7344958411899583
 diningtable    : 0.5379541883401677
 dog            : 0.8230037525430133
 horse          : 0.7880475852689804
 motorbike      : 0.7879788462924051
 person         : 0.8351553291238482
 pottedplant    : 0.4420858247895347
 sheep          : 0.7466344247593008
 sofa           : 0.6627392793997164
 train          : 0.8380502070312741
 tvmonitor      : 0.7445168617489237

================================================
FILE: pytorch_object_detection/ssd/requirements.txt
================================================
numpy
matplotlib
tqdm
pycocotools
torch==1.7.1
torchvision==0.8.2
lxml
Pillow


================================================
FILE: pytorch_object_detection/ssd/src/__init__.py
================================================
from .res50_backbone import resnet50
from .ssd_model import SSD300, Backbone
from .utils import dboxes300_coco, calc_iou_tensor, Encoder, PostProcess


================================================
FILE: pytorch_object_detection/ssd/src/res50_backbone.py
================================================
import torch.nn as nn
import torch


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet50(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


================================================
FILE: pytorch_object_detection/ssd/src/ssd_model.py
================================================
import torch
from torch import nn, Tensor
from torch.jit.annotations import List

from .res50_backbone import resnet50
from .utils import dboxes300_coco, Encoder, PostProcess


class Backbone(nn.Module):
    def __init__(self, pretrain_path=None):
        super(Backbone, self).__init__()
        net = resnet50()
        self.out_channels = [1024, 512, 512, 256, 256, 256]

        if pretrain_path is not None:
            net.load_state_dict(torch.load(pretrain_path))

        self.feature_extractor = nn.Sequential(*list(net.children())[:7])

        conv4_block1 = self.feature_extractor[-1][0]

        # 修改conv4_block1的步距，从2->1
        conv4_block1.conv1.stride = (1, 1)
        conv4_block1.conv2.stride = (1, 1)
        conv4_block1.downsample[0].stride = (1, 1)

    def forward(self, x):
        x = self.feature_extractor(x)
        return x


class SSD300(nn.Module):
    def __init__(self, backbone=None, num_classes=21):
        super(SSD300, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")
        self.feature_extractor = backbone

        self.num_classes = num_classes
        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        self._build_additional_features(self.feature_extractor.out_channels)
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        location_extractors = []
        confidence_extractors = []

        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        default_box = dboxes300_coco()
        self.compute_loss = Loss(default_box)
        self.encoder = Encoder(default_box)
        self.postprocess = PostProcess(default_box)

    def _build_additional_features(self, input_size):
        """
        为backbone(resnet50)添加额外的一系列卷积层，得到相应的一系列特征提取器
        :param input_size:
        :return:
        """
        additional_blocks = []
        # input_size = [1024, 512, 512, 256, 256, 256] for resnet50
        middle_channels = [256, 256, 128, 128, 128]
        for i, (input_ch, output_ch, middle_ch) in enumerate(zip(input_size[:-1], input_size[1:], middle_channels)):
            padding, stride = (1, 2) if i < 3 else (0, 1)
            layer = nn.Sequential(
                nn.Conv2d(input_ch, middle_ch, kernel_size=1, bias=False),
                nn.BatchNorm2d(middle_ch),
                nn.ReLU(inplace=True),
                nn.Conv2d(middle_ch, output_ch, kernel_size=3, padding=padding, stride=stride, bias=False),
                nn.BatchNorm2d(output_ch),
                nn.ReLU(inplace=True),
            )
            additional_blocks.append(layer)
        self.additional_blocks = nn.ModuleList(additional_blocks)

    def _init_weights(self):
        layers = [*self.additional_blocks, *self.loc, *self.conf]
        for layer in layers:
            for param in layer.parameters():
                if param.dim() > 1:
                    nn.init.xavier_uniform_(param)

    # Shape the classifier to the view of bboxes
    def bbox_view(self, features, loc_extractor, conf_extractor):
        locs = []
        confs = []
        for f, l, c in zip(features, loc_extractor, conf_extractor):
            # [batch, n*4, feat_size, feat_size] -> [batch, 4, -1]
            locs.append(l(f).view(f.size(0), 4, -1))
            # [batch, n*classes, feat_size, feat_size] -> [batch, classes, -1]
            confs.append(c(f).view(f.size(0), self.num_classes, -1))

        locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous()
        return locs, confs

    def forward(self, image, targets=None):
        x = self.feature_extractor(image)

        # Feature Map 38x38x1024, 19x19x512, 10x10x512, 5x5x256, 3x3x256, 1x1x256
        detection_features = torch.jit.annotate(List[Tensor], [])  # [x]
        detection_features.append(x)
        for layer in self.additional_blocks:
            x = layer(x)
            detection_features.append(x)

        # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
        locs, confs = self.bbox_view(detection_features, self.loc, self.conf)

        # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results
        # 38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732

        if self.training:
            if targets is None:
                raise ValueError("In training mode, targets should be passed")
            # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)
            bboxes_out = targets['boxes']
            bboxes_out = bboxes_out.transpose(1, 2).contiguous()
            # print(bboxes_out.is_contiguous())
            labels_out = targets['labels']
            # print(labels_out.is_contiguous())

            # ploc, plabel, gloc, glabel
            loss = self.compute_loss(locs, confs, bboxes_out, labels_out)
            return {"total_losses": loss}

        # 将预测回归参数叠加到default box上得到最终预测box，并执行非极大值抑制虑除重叠框
        # results = self.encoder.decode_batch(locs, confs)
        results = self.postprocess(locs, confs)
        return results


class Loss(nn.Module):
    """
        Implements the loss as the sum of the followings:
        1. Confidence Loss: All labels, with hard negative mining
        2. Localization Loss: Only on positive labels
        Suppose input dboxes has the shape 8732x4
    """
    def __init__(self, dboxes):
        super(Loss, self).__init__()
        # Two factor are from following links
        # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
        self.scale_xy = 1.0 / dboxes.scale_xy  # 10
        self.scale_wh = 1.0 / dboxes.scale_wh  # 5

        self.location_loss = nn.SmoothL1Loss(reduction='none')
        # [num_anchors, 4] -> [4, num_anchors] -> [1, 4, num_anchors]
        self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0),
                                   requires_grad=False)

        self.confidence_loss = nn.CrossEntropyLoss(reduction='none')

    def _location_vec(self, loc):
        # type: (Tensor) -> Tensor
        """
        Generate Location Vectors
        计算ground truth相对anchors的回归参数
        :param loc: anchor匹配到的对应GTBOX Nx4x8732
        :return:
        """
        gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :]  # Nx2x8732
        gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log()  # Nx2x8732
        return torch.cat((gxy, gwh), dim=1).contiguous()

    def forward(self, ploc, plabel, gloc, glabel):
        # type: (Tensor, Tensor, Tensor, Tensor) -> Tensor
        """
            ploc, plabel: Nx4x8732, Nxlabel_numx8732
                predicted location and labels

            gloc, glabel: Nx4x8732, Nx8732
                ground truth location and labels
        """
        # 获取正样本的mask  Tensor: [N, 8732]
        mask = torch.gt(glabel, 0)  # (gt: >)
        # mask1 = torch.nonzero(glabel)
        # 计算一个batch中的每张图片的正样本个数 Tensor: [N]
        pos_num = mask.sum(dim=1)

        # 计算gt的location回归参数 Tensor: [N, 4, 8732]
        vec_gd = self._location_vec(gloc)

        # sum on four coordinates, and mask
        # 计算定位损失(只有正样本)
        loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1)  # Tensor: [N, 8732]
        loc_loss = (mask.float() * loc_loss).sum(dim=1)  # Tenosr: [N]

        # hard negative mining Tenosr: [N, 8732]
        con = self.confidence_loss(plabel, glabel)

        # positive mask will never selected
        # 获取负样本
        con_neg = con.clone()
        con_neg[mask] = 0.0
        # 按照confidence_loss降序排列 con_idx(Tensor: [N, 8732])
        _, con_idx = con_neg.sort(dim=1, descending=True)
        _, con_rank = con_idx.sort(dim=1)  # 这个步骤比较巧妙

        # number of negative three times positive
        # 用于损失计算的负样本数是正样本的3倍（在原论文Hard negative mining部分），
        # 但不能超过总样本数8732
        neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)
        neg_mask = torch.lt(con_rank, neg_num)  # (lt: <) Tensor [N, 8732]

        # confidence最终loss使用选取的正样本loss+选取的负样本loss
        con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1)  # Tensor [N]

        # avoid no object detected
        # 避免出现图像中没有GTBOX的情况
        total_loss = loc_loss + con_loss
        # eg. [15, 3, 5, 0] -> [1.0, 1.0, 1.0, 0.0]
        num_mask = torch.gt(pos_num, 0).float()  # 统计一个batch中的每张图像中是否存在正样本
        pos_num = pos_num.float().clamp(min=1e-6)  # 防止出现分母为零的情况
        ret = (total_loss * num_mask / pos_num).mean(dim=0)  # 只计算存在正样本的图像损失
        return ret


================================================
FILE: pytorch_object_detection/ssd/src/utils.py
================================================
from math import sqrt
import itertools

import torch
import torch.nn.functional as F
from torch.jit.annotations import Tuple, List
from torch import nn, Tensor
import numpy as np


# This function is from https://github.com/kuangliu/pytorch-ssd.
# def calc_iou_tensor(box1, box2):
#     """ Calculation of IoU based on two boxes tensor,
#         Reference to https://github.com/kuangliu/pytorch-src
#         input:
#             box1 (N, 4)  format [xmin, ymin, xmax, ymax]
#             box2 (M, 4)  format [xmin, ymin, xmax, ymax]
#         output:
#             IoU (N, M)
#     """
#     N = box1.size(0)
#     M = box2.size(0)
#
#     # (N, 4) -> (N, 1, 4) -> (N, M, 4)
#     be1 = box1.unsqueeze(1).expand(-1, M, -1)  # -1 means not changing the size of that dimension
#     # (M, 4) -> (1, M, 4) -> (N, M, 4)
#     be2 = box2.unsqueeze(0).expand(N, -1, -1)
#
#     # Left Top and Right Bottom
#     lt = torch.max(be1[:, :, :2], be2[:, :, :2])
#     rb = torch.min(be1[:, :, 2:], be2[:, :, 2:])
#
#     # compute intersection area
#     delta = rb - lt  # width and height
#     delta[delta < 0] = 0
#     # width * height
#     intersect = delta[:, :, 0] * delta[:, :, 1]
#
#     # compute bel1 area
#     delta1 = be1[:, :, 2:] - be1[:, :, :2]
#     area1 = delta1[:, :, 0] * delta1[:, :, 1]
#     # compute bel2 area
#     delta2 = be2[:, :, 2:] - be2[:, :, :2]
#     area2 = delta2[:, :, 0] * delta2[:, :, 1]
#
#     iou = intersect / (area1 + area2 - intersect)
#     return iou


def box_area(boxes):
    """
    Computes the area of a set of bounding boxes, which are specified by its
    (x1, y1, x2, y2) coordinates.

    Arguments:
        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
            are expected to be in (x1, y1, x2, y2) format

    Returns:
        area (Tensor[N]): area for each box
    """
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])


def calc_iou_tensor(boxes1, boxes2):
    """
    Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

    Arguments:
        boxes1 (Tensor[N, 4])
        boxes2 (Tensor[M, 4])

    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    #  When the shapes do not match,
    #  the shape of the returned output tensor follows the broadcasting rules
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    iou = inter / (area1[:, None] + area2 - inter)
    return iou


# This function is from https://github.com/kuangliu/pytorch-ssd.
class Encoder(object):
    """
        Inspired by https://github.com/kuangliu/pytorch-src
        Transform between (bboxes, lables) <-> SSD output

        dboxes: default boxes in size 8732 x 4,
            encoder: input ltrb format, output xywh format
            decoder: input xywh format, output ltrb format

        encode:
            input  : bboxes_in (Tensor nboxes x 4), labels_in (Tensor nboxes)
            output : bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)
            criteria : IoU threshold of bboexes

        decode:
            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)
            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)
            criteria : IoU threshold of bboexes
            max_output : maximum number of output bboxes
    """
    def __init__(self, dboxes):
        self.dboxes = dboxes(order='ltrb')
        self.dboxes_xywh = dboxes(order='xywh').unsqueeze(dim=0)
        self.nboxes = self.dboxes.size(0)  # default boxes的数量
        self.scale_xy = dboxes.scale_xy
        self.scale_wh = dboxes.scale_wh

    def encode(self, bboxes_in, labels_in, criteria=0.5):
        """
        encode:
            input  : bboxes_in (Tensor nboxes x 4), labels_in (Tensor nboxes)
            output : bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)
            criteria : IoU threshold of bboexes
        """
        # [nboxes, 8732]
        ious = calc_iou_tensor(bboxes_in, self.dboxes)  # 计算每个GT与default box的iou
        # [8732,]
        best_dbox_ious, best_dbox_idx = ious.max(dim=0)  # 寻找每个default box匹配到的最大IoU
        # [nboxes,]
        best_bbox_ious, best_bbox_idx = ious.max(dim=1)  # 寻找每个GT匹配到的最大IoU

        # 将每个GT匹配到的最佳default box设置为正样本（对应论文中Matching strategy的第一条）
        # set best ious 2.0
        best_dbox_ious.index_fill_(0, best_bbox_idx, 2.0)  # dim, index, value
        # 将相应default box匹配最大IOU的GT索引进行替换
        idx = torch.arange(0, best_bbox_idx.size(0), dtype=torch.int64)
        best_dbox_idx[best_bbox_idx[idx]] = idx

        # filter IoU > 0.5
        # 寻找与GT iou大于0.5的default box,对应论文中Matching strategy的第二条(这里包括了第一条匹配到的信息)
        masks = best_dbox_ious > criteria
        # [8732,]
        labels_out = torch.zeros(self.nboxes, dtype=torch.int64)
        labels_out[masks] = labels_in[best_dbox_idx[masks]]
        # 将default box匹配到正样本的位置设置成对应GT的box信息
        bboxes_out = self.dboxes.clone()
        bboxes_out[masks, :] = bboxes_in[best_dbox_idx[masks], :]

        # Transform format to xywh format
        x = 0.5 * (bboxes_out[:, 0] + bboxes_out[:, 2])  # x
        y = 0.5 * (bboxes_out[:, 1] + bboxes_out[:, 3])  # y
        w = bboxes_out[:, 2] - bboxes_out[:, 0]  # w
        h = bboxes_out[:, 3] - bboxes_out[:, 1]  # h
        bboxes_out[:, 0] = x
        bboxes_out[:, 1] = y
        bboxes_out[:, 2] = w
        bboxes_out[:, 3] = h
        return bboxes_out, labels_out

    def scale_back_batch(self, bboxes_in, scores_in):
        """
            将box格式从xywh转换回ltrb, 将预测目标score通过softmax处理
            Do scale and transform from xywh to ltrb
            suppose input N x 4 x num_bbox | N x label_num x num_bbox

            bboxes_in: 是网络预测的xywh回归参数
            scores_in: 是预测的每个default box的各目标概率
        """
        if bboxes_in.device == torch.device("cpu"):
            self.dboxes = self.dboxes.cpu()
            self.dboxes_xywh = self.dboxes_xywh.cpu()
        else:
            self.dboxes = self.dboxes.cuda()
            self.dboxes_xywh = self.dboxes_xywh.cuda()

        # Returns a view of the original tensor with its dimensions permuted.
        bboxes_in = bboxes_in.permute(0, 2, 1)
        scores_in = scores_in.permute(0, 2, 1)
        # print(bboxes_in.is_contiguous())

        bboxes_in[:, :, :2] = self.scale_xy * bboxes_in[:, :, :2]   # 预测的x, y回归参数
        bboxes_in[:, :, 2:] = self.scale_wh * bboxes_in[:, :, 2:]   # 预测的w, h回归参数

        # 将预测的回归参数叠加到default box上得到最终的预测边界框
        bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, :, 2:] + self.dboxes_xywh[:, :, :2]
        bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, :, 2:]

        # transform format to ltrb
        l = bboxes_in[:, :, 0] - 0.5 * bboxes_in[:, :, 2]
        t = bboxes_in[:, :, 1] - 0.5 * bboxes_in[:, :, 3]
        r = bboxes_in[:, :, 0] + 0.5 * bboxes_in[:, :, 2]
        b = bboxes_in[:, :, 1] + 0.5 * bboxes_in[:, :, 3]

        bboxes_in[:, :, 0] = l  # xmin
        bboxes_in[:, :, 1] = t  # ymin
        bboxes_in[:, :, 2] = r  # xmax
        bboxes_in[:, :, 3] = b  # ymax

        return bboxes_in, F.softmax(scores_in, dim=-1)

    def decode_batch(self, bboxes_in, scores_in, criteria=0.45, max_output=200):
        # 将box格式从xywh转换回ltrb（方便后面非极大值抑制时求iou）, 将预测目标score通过softmax处理
        bboxes, probs = self.scale_back_batch(bboxes_in, scores_in)

        outputs = []
        # 遍历一个batch中的每张image数据
        for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)):
            bbox = bbox.squeeze(0)
            prob = prob.squeeze(0)
            outputs.append(self.decode_single_new(bbox, prob, criteria, max_output))
        return outputs

    def decode_single_new(self, bboxes_in, scores_in, criteria, num_output=200):
        """
        decode:
            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)
            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)
            criteria : IoU threshold of bboexes
            max_output : maximum number of output bboxes
        """
        device = bboxes_in.device
        num_classes = scores_in.shape[-1]

        # 对越界的bbox进行裁剪
        bboxes_in = bboxes_in.clamp(min=0, max=1)

        # [8732, 4] -> [8732, 21, 4]
        bboxes_in = bboxes_in.repeat(1, num_classes).reshape(scores_in.shape[0], -1, 4)

        # create labels for each prediction
        labels = torch.arange(num_classes, device=device)
        labels = labels.view(1, -1).expand_as(scores_in)

        # remove prediction with the background label
        # 移除归为背景类别的概率信息
        bboxes_in = bboxes_in[:, 1:, :]
        scores_in = scores_in[:, 1:]
        labels = labels[:, 1:]

        # batch everything, by making every class prediction be a separate instance
        bboxes_in = bboxes_in.reshape(-1, 4)
        scores_in = scores_in.reshape(-1)
        labels = labels.reshape(-1)

        # remove low scoring boxes
        # 移除低概率目标，self.scores_thresh=0.05
        inds = torch.nonzero(scores_in > 0.05, as_tuple=False).squeeze(1)
        bboxes_in, scores_in, labels = bboxes_in[inds], scores_in[inds], labels[inds]

        # remove empty boxes
        ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1]
        keep = (ws >= 0.1 / 300) & (hs >= 0.1 / 300)
        keep = keep.nonzero(as_tuple=False).squeeze(1)
        bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep]

        # non-maximum suppression
        keep = batched_nms(bboxes_in, scores_in, labels, iou_threshold=criteria)

        # keep only topk scoring predictions
        keep = keep[:num_output]
        bboxes_out = bboxes_in[keep, :]
        scores_out = scores_in[keep]
        labels_out = labels[keep]

        return bboxes_out, labels_out, scores_out

    # perform non-maximum suppression
    def decode_single(self, bboxes_in, scores_in, criteria, max_output, max_num=200):
        """
        decode:
            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)
            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)
            criteria : IoU threshold of bboexes
            max_output : maximum number of output bboxes
        """
        # Reference to https://github.com/amdegroot/ssd.pytorch
        bboxes_out = []
        scores_out = []
        labels_out = []

        # 非极大值抑制算法
        # scores_in (Tensor 8732 x nitems), 遍历返回每一列数据，即8732个目标的同一类别的概率
        for i, score in enumerate(scores_in.split(1, 1)):
            # skip background
            if i == 0:
                continue

            # [8732, 1] -> [8732]
            score = score.squeeze(1)

            # 虑除预测概率小于0.05的目标
            mask = score > 0.05
            bboxes, score = bboxes_in[mask, :], score[mask]
            if score.size(0) == 0:
                continue

            # 按照分数从小到大排序
            score_sorted, score_idx_sorted = score.sort(dim=0)

            # select max_output indices
            score_idx_sorted = score_idx_sorted[-max_num:]
            candidates = []

            while score_idx_sorted.numel() > 0:
                idx = score_idx_sorted[-1].item()
                # 获取排名前score_idx_sorted名的bboxes信息 Tensor:[score_idx_sorted, 4]
                bboxes_sorted = bboxes[score_idx_sorted, :]
                # 获取排名第一的bboxes信息 Tensor:[4]
                bboxes_idx = bboxes[idx, :].unsqueeze(dim=0)
                # 计算前score_idx_sorted名的bboxes与第一名的bboxes的iou
                iou_sorted = calc_iou_tensor(bboxes_sorted, bboxes_idx).squeeze()

                # we only need iou < criteria
                # 丢弃与第一名iou > criteria的所有目标(包括自己本身)
                score_idx_sorted = score_idx_sorted[iou_sorted < criteria]
                # 保存第一名的索引信息
                candidates.append(idx)

            # 保存该类别通过非极大值抑制后的目标信息
            bboxes_out.append(bboxes[candidates, :])   # bbox坐标信息
            scores_out.append(score[candidates])       # score信息
            labels_out.extend([i] * len(candidates))   # 标签信息

        if not bboxes_out:  # 如果为空的话，返回空tensor，注意boxes对应的空tensor size，防止验证时出错
            return [torch.empty(size=(0, 4)), torch.empty(size=(0,), dtype=torch.int64), torch.empty(size=(0,))]

        bboxes_out = torch.cat(bboxes_out, dim=0).contiguous()
        scores_out = torch.cat(scores_out, dim=0).contiguous()
        labels_out = torch.as_tensor(labels_out, dtype=torch.long)

        # 对所有目标的概率进行排序（无论是什 么类别）,取前max_num个目标
        _, max_ids = scores_out.sort(dim=0)
        max_ids = max_ids[-max_output:]
        return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]


class DefaultBoxes(object):
    def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, scale_xy=0.1, scale_wh=0.2):
        self.fig_size = fig_size   # 输入网络的图像大小 300
        # [38, 19, 10, 5, 3, 1]
        self.feat_size = feat_size  # 每个预测层的feature map尺寸

        self.scale_xy_ = scale_xy
        self.scale_wh_ = scale_wh

        # According to https://github.com/weiliu89/caffe
        # Calculation method slightly different from paper
        # [8, 16, 32, 64, 100, 300]
        self.steps = steps    # 每个特征层上的一个cell在原图上的跨度

        # [21, 45, 99, 153, 207, 261, 315]
        self.scales = scales  # 每个特征层上预测的default box的scale

        fk = fig_size / np.array(steps)     # 计算每层特征层的fk
        # [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
        self.aspect_ratios = aspect_ratios  # 每个预测特征层上预测的default box的ratios

        self.default_boxes = []
        # size of feature and number of feature
        # 遍历每层特征层，计算default box
        for idx, sfeat in enumerate(self.feat_size):
            sk1 = scales[idx] / fig_size  # scale转为相对值[0-1]
            sk2 = scales[idx + 1] / fig_size  # scale转为相对值[0-1]
            sk3 = sqrt(sk1 * sk2)
            # 先添加两个1:1比例的default box宽和高
            all_sizes = [(sk1, sk1), (sk3, sk3)]

            # 再将剩下不同比例的default box宽和高添加到all_sizes中
            for alpha in aspect_ratios[idx]:
                w, h = sk1 * sqrt(alpha), sk1 / sqrt(alpha)
                all_sizes.append((w, h))
                all_sizes.append((h, w))

            # 计算当前特征层对应原图上的所有default box
            for w, h in all_sizes:
                for i, j in itertools.product(range(sfeat), repeat=2):  # i -> 行（y）， j -> 列（x）
                    # 计算每个default box的中心坐标（范围是在0-1之间）
                    cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]
                    self.default_boxes.append((cx, cy, w, h))

        # 将default_boxes转为tensor格式
        self.dboxes = torch.as_tensor(self.default_boxes, dtype=torch.float32)  # 这里不转类型会报错
        self.dboxes.clamp_(min=0, max=1)  # 将坐标（x, y, w, h）都限制在0-1之间

        # For IoU calculation
        # ltrb is left top coordinate and right bottom coordinate
        # 将(x, y, w, h)转换成(xmin, ymin, xmax, ymax)，方便后续计算IoU(匹配正负样本时)
        self.dboxes_ltrb = self.dboxes.clone()
        self.dboxes_ltrb[:, 0] = self.dboxes[:, 0] - 0.5 * self.dboxes[:, 2]   # xmin
        self.dboxes_ltrb[:, 1] = self.dboxes[:, 1] - 0.5 * self.dboxes[:, 3]   # ymin
        self.dboxes_ltrb[:, 2] = self.dboxes[:, 0] + 0.5 * self.dboxes[:, 2]   # xmax
        self.dboxes_ltrb[:, 3] = self.dboxes[:, 1] + 0.5 * self.dboxes[:, 3]   # ymax

    @property
    def scale_xy(self):
        return self.scale_xy_

    @property
    def scale_wh(self):
        return self.scale_wh_

    def __call__(self, order='ltrb'):
        # 根据需求返回对应格式的default box
        if order == 'ltrb':
            return self.dboxes_ltrb

        if order == 'xywh':
            return self.dboxes


def dboxes300_coco():
    figsize = 300  # 输入网络的图像大小
    feat_size = [38, 19, 10, 5, 3, 1]   # 每个预测层的feature map尺寸
    steps = [8, 16, 32, 64, 100, 300]   # 每个特征层上的一个cell在原图上的跨度
    # use the scales here: https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py
    scales = [21, 45, 99, 153, 207, 261, 315]  # 每个特征层上预测的default box的scale
    aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]  # 每个预测特征层上预测的default box的ratios
    dboxes = DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios)
    return dboxes


def nms(boxes, scores, iou_threshold):
    # type: (Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Parameters
    ----------
    boxes : Tensor[N, 4])
        boxes to perform NMS on. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    iou_threshold : float
        discards all overlapping
        boxes with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices
        of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)


def batched_nms(boxes, scores, idxs, iou_threshold):
    # type: (Tensor, Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression in a batched fashion.

    Each index value correspond to a category, and NMS
    will not be applied between elements of different categories.

    Parameters
    ----------
    boxes : Tensor[N, 4]
        boxes where NMS will be performed. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    idxs : Tensor[N]
        indices of the categories for each one of the boxes.
    iou_threshold : float
        discards all overlapping boxes
        with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices of
        the elements that have been kept by NMS, sorted
        in decreasing order of scores
    """
    if boxes.numel() == 0:
        return torch.empty((0,), dtype=torch.int64, device=boxes.device)

    # strategy: in order to perform NMS independently per class.
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）
    max_coordinate = boxes.max()

    # to(): Performs Tensor dtype and/or device conversion
    # 为每一个类别生成一个很大的偏移量
    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致
    offsets = idxs.to(boxes) * (max_coordinate + 1)
    # boxes加上对应层的偏移量后，保证不同类别之间boxes不会有重合的现象
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    return keep


class PostProcess(nn.Module):
    def __init__(self, dboxes):
        super(PostProcess, self).__init__()
        # [num_anchors, 4] -> [1, num_anchors, 4]
        self.dboxes_xywh = nn.Parameter(dboxes(order='xywh').unsqueeze(dim=0),
                                        requires_grad=False)
        self.scale_xy = dboxes.scale_xy  # 0.1
        self.scale_wh = dboxes.scale_wh  # 0.2

        self.criteria = 0.5
        self.max_output = 100

    def scale_back_batch(self, bboxes_in, scores_in):
        # type: (Tensor, Tensor) -> Tuple[Tensor, Tensor]
        """
            1）通过预测的boxes回归参数得到最终预测坐标
            2）将box格式从xywh转换回ltrb
            3）将预测目标score通过softmax处理
            Do scale and transform from xywh to ltrb
            suppose input N x 4 x num_bbox | N x label_num x num_bbox

            bboxes_in: [N, 4, 8732]是网络预测的xywh回归参数
            scores_in: [N, label_num, 8732]是预测的每个default box的各目标概率
        """

        # Returns a view of the original tensor with its dimensions permuted.
        # [batch, 4, 8732] -> [batch, 8732, 4]
        bboxes_in = bboxes_in.permute(0, 2, 1)
        # [batch, label_num, 8732] -> [batch, 8732, label_num]
        scores_in = scores_in.permute(0, 2, 1)
        # print(bboxes_in.is_contiguous())

        bboxes_in[:, :, :2] = self.scale_xy * bboxes_in[:, :, :2]   # 预测的x, y回归参数
        bboxes_in[:, :, 2:] = self.scale_wh * bboxes_in[:, :, 2:]   # 预测的w, h回归参数

        # 将预测的回归参数叠加到default box上得到最终的预测边界框
        bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, :, 2:] + self.dboxes_xywh[:, :, :2]
        bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, :, 2:]

        # transform format to ltrb
        l = bboxes_in[:, :, 0] - 0.5 * bboxes_in[:, :, 2]
        t = bboxes_in[:, :, 1] - 0.5 * bboxes_in[:, :, 3]
        r = bboxes_in[:, :, 0] + 0.5 * bboxes_in[:, :, 2]
        b = bboxes_in[:, :, 1] + 0.5 * bboxes_in[:, :, 3]

        bboxes_in[:, :, 0] = l  # xmin
        bboxes_in[:, :, 1] = t  # ymin
        bboxes_in[:, :, 2] = r  # xmax
        bboxes_in[:, :, 3] = b  # ymax

        # scores_in: [batch, 8732, label_num]
        return bboxes_in, F.softmax(scores_in, dim=-1)

    def decode_single_new(self, bboxes_in, scores_in, criteria, num_output):
        # type: (Tensor, Tensor, float, int) -> Tuple[Tensor, Tensor, Tensor]
        """
        decode:
            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)
            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)
            criteria : IoU threshold of bboexes
            max_output : maximum number of output bboxes
        """
        device = bboxes_in.device
        num_classes = scores_in.shape[-1]

        # 对越界的bbox进行裁剪
        bboxes_in = bboxes_in.clamp(min=0, max=1)

        # [8732, 4] -> [8732, 21, 4]
        bboxes_in = bboxes_in.repeat(1, num_classes).reshape(scores_in.shape[0], -1, 4)

        # create labels for each prediction
        labels = torch.arange(num_classes, device=device)
        # [num_classes] -> [8732, num_classes]
        labels = labels.view(1, -1).expand_as(scores_in)

        # remove prediction with the background label
        # 移除归为背景类别的概率信息
        bboxes_in = bboxes_in[:, 1:, :]  # [8732, 21, 4] -> [8732, 20, 4]
        scores_in = scores_in[:, 1:]  # [8732, 21] -> [8732, 20]
        labels = labels[:, 1:]  # [8732, 21] -> [8732, 20]

        # batch everything, by making every class prediction be a separate instance
        bboxes_in = bboxes_in.reshape(-1, 4)  # [8732, 20, 4] -> [8732x20, 4]
        scores_in = scores_in.reshape(-1)  # [8732, 20] -> [8732x20]
        labels = labels.reshape(-1)  # [8732, 20] -> [8732x20]

        # remove low scoring boxes
        # 移除低概率目标，self.scores_thresh=0.05
        # inds = torch.nonzero(scores_in > 0.05).squeeze(1)
        inds = torch.where(torch.gt(scores_in, 0.05))[0]
        bboxes_in, scores_in, labels = bboxes_in[inds, :], scores_in[inds], labels[inds]

        # remove empty boxes
        ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1]
        keep = (ws >= 1 / 300) & (hs >= 1 / 300)
        # keep = keep.nonzero().squeeze(1)
        keep = torch.where(keep)[0]
        bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep]

        # non-maximum suppression
        keep = batched_nms(bboxes_in, scores_in, labels, iou_threshold=criteria)

        # keep only topk scoring predictions
        keep = keep[:num_output]
        bboxes_out = bboxes_in[keep, :]
        scores_out = scores_in[keep]
        labels_out = labels[keep]

        return bboxes_out, labels_out, scores_out

    def forward(self, bboxes_in, scores_in):
        # 通过预测的boxes回归参数得到最终预测坐标, 将预测目标score通过softmax处理
        bboxes, probs = self.scale_back_batch(bboxes_in, scores_in)

        outputs = torch.jit.annotate(List[Tuple[Tensor, Tensor, Tensor]], [])
        # 遍历一个batch中的每张image数据
        # bboxes: [batch, 8732, 4]
        for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)):  # split_size, split_dim
            # bbox: [1, 8732, 4]
            bbox = bbox.squeeze(0)
            prob = prob.squeeze(0)
            outputs.append(self.decode_single_new(bbox, prob, self.criteria, self.max_output))
        return outputs


================================================
FILE: pytorch_object_detection/ssd/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch

import transforms
from my_dataset import VOCDataSet
from src import SSD300, Backbone
import train_utils.train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir


def create_model(num_classes):
    # https://download.pytorch.org/models/resnet50-19c8e357.pth
    # pre_train_path = "./src/resnet50.pth"
    backbone = Backbone(pretrain_path=None)
    model = SSD300(backbone=backbone, num_classes=num_classes)

    pre_ssd_path = "./src/nvidia_ssdpyt_fp32.pt"
    pre_model_dict = torch.load(pre_ssd_path, map_location='cpu')
    pre_weights_dict = pre_model_dict["model"]

    # 删除类别预测器权重，注意，回归预测器的权重可以重用，因为不涉及num_classes
    del_conf_loc_dict = {}
    for k, v in pre_weights_dict.items():
        split_key = k.split(".")
        if "conf" in split_key:
            continue
        del_conf_loc_dict.update({k: v})

    missing_keys, unexpected_keys = model.load_state_dict(del_conf_loc_dict, strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # Data loading code
    print("Loading data")

    data_transform = {
        "train": transforms.Compose([transforms.SSDCropping(),
                                     transforms.Resize(),
                                     transforms.ColorJitter(),
                                     transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.Normalization(),
                                     transforms.AssignGTtoDefaultBox()]),
        "val": transforms.Compose([transforms.Resize(),
                                   transforms.ToTensor(),
                                   transforms.Normalization()])
    }

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_data_set = VOCDataSet(VOC_root, "2012", data_transform["train"], train_set='train.txt')

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_data_set = VOCDataSet(VOC_root, "2012", data_transform["val"], train_set='val.txt')

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_data_set)
        test_sampler = torch.utils.data.SequentialSampler(val_data_set)

    if args.aspect_ratio_group_factor >= 0:
        # 统计所有图像比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_data_set, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        train_data_set, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=train_data_set.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        val_data_set, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_data_set.collate_fn)

    print("Creating model")
    model = create_model(num_classes=args.num_classes+1)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1

    if args.test_only:
        utils.evaluate(model, data_loader_test, device=device)
        return

    train_loss = []
    learning_rate = []
    val_map = []
    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device,
                                              epoch, args.print_freq, warmup=True)
        # only first process to save training info
        if args.rank in [-1, 0]:
            train_loss.append(mean_loss.item())
            learning_rate.append(lr)

        # update learning rate
        lr_scheduler.step()

        # evaluate after every epoch
        coco_info = utils.evaluate(model, data_loader_test, device=device)

        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item()]] + [str(round(lr, 6))]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

            val_map.append(coco_info[1])  # pascal mAP

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_on_master({
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch},
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    if args.rank in [-1, 0]:
        # plot loss and lr curve
        if len(train_loss) != 0 and len(learning_rate) != 0:
            from plot_curve import plot_loss_and_lr
            plot_loss_and_lr(train_loss, learning_rate)

        # plot mAP curve
        if len(val_map) != 0:
            from plot_curve import plot_map
            plot_map(val_map)


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 检测的目标类别个数，不包括背景
    parser.add_argument('--num_classes', default=20, type=int, help='num_classes')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=8, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=20, type=int, metavar='N',
                        help='number of total epochs to run')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.005 / 8 * num_GPU
    parser.add_argument('--lr', default=0.005, type=float,
                        help='initial learning rate, 0.005 is the default value for training '
                        'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.StepLR的参数
    parser.add_argument('--lr-step-size', default=5, type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.3, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 开启的进程数(注意不是线程)
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/ssd/train_ssd300.py
================================================
import os
import datetime

import torch

import transforms
from my_dataset import VOCDataSet
from src import SSD300, Backbone
import train_utils.train_eval_utils as utils
from train_utils import get_coco_api_from_dataset


def create_model(num_classes=21):
    # https://download.pytorch.org/models/resnet50-19c8e357.pth
    # pre_train_path = "./src/resnet50.pth"
    backbone = Backbone()
    model = SSD300(backbone=backbone, num_classes=num_classes)

    # https://ngc.nvidia.com/catalog/models -> search ssd -> download FP32
    pre_ssd_path = "./src/nvidia_ssdpyt_fp32.pt"
    if os.path.exists(pre_ssd_path) is False:
        raise FileNotFoundError("nvidia_ssdpyt_fp32.pt not find in {}".format(pre_ssd_path))
    pre_model_dict = torch.load(pre_ssd_path, map_location='cpu')
    pre_weights_dict = pre_model_dict["model"]

    # 删除类别预测器权重，注意，回归预测器的权重可以重用，因为不涉及num_classes
    del_conf_loc_dict = {}
    for k, v in pre_weights_dict.items():
        split_key = k.split(".")
        if "conf" in split_key:
            continue
        del_conf_loc_dict.update({k: v})

    missing_keys, unexpected_keys = model.load_state_dict(del_conf_loc_dict, strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    return model


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    if not os.path.exists("save_weights"):
        os.mkdir("save_weights")

    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.SSDCropping(),
                                     transforms.Resize(),
                                     transforms.ColorJitter(),
                                     transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.Normalization(),
                                     transforms.AssignGTtoDefaultBox()]),
        "val": transforms.Compose([transforms.Resize(),
                                   transforms.ToTensor(),
                                   transforms.Normalization()])
    }

    VOC_root = parser_data.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt
    train_dataset = VOCDataSet(VOC_root, "2012", data_transform['train'], train_set='train.txt')
    # 注意训练时，batch_size必须大于1
    batch_size = parser_data.batch_size
    assert batch_size > 1, "batch size must be greater than 1"
    # 防止最后一个batch_size=1，如果最后一个batch_size=1就舍去
    drop_last = True if len(train_dataset) % batch_size == 1 else False
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)
    train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=nw,
                                                    collate_fn=train_dataset.collate_fn,
                                                    drop_last=drop_last)

    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", data_transform['val'], train_set='val.txt')
    val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=nw,
                                                  collate_fn=train_dataset.collate_fn)

    model = create_model(num_classes=args.num_classes+1)
    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.0005,
                                momentum=0.9, weight_decay=0.0005)
    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=5,
                                                   gamma=0.3)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if parser_data.resume != "":
        checkpoint = torch.load(parser_data.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        parser_data.start_epoch = checkpoint['epoch'] + 1
        print("the training process from epoch{}...".format(parser_data.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    # 提前加载验证集数据，以免每次验证时都要重新加载一次数据，节省时间
    val_data = get_coco_api_from_dataset(val_data_loader.dataset)
    for epoch in range(parser_data.start_epoch, parser_data.epochs):
        mean_loss, lr = utils.train_one_epoch(model=model, optimizer=optimizer,
                                              data_loader=train_data_loader,
                                              device=device, epoch=epoch,
                                              print_freq=50)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update learning rate
        lr_scheduler.step()

        coco_info = utils.evaluate(model=model, data_loader=val_data_loader,
                                   device=device, data_set=val_data)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item()]] + [str(round(lr, 6))]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        torch.save(save_files, "./save_weights/ssd300-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)

    # inputs = torch.rand(size=(2, 3, 300, 300))
    # output = model(inputs)
    # print(output)


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 检测的目标类别个数，不包括背景
    parser.add_argument('--num_classes', default=20, type=int, help='num_classes')
    # 训练数据集的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=15, type=int, metavar='N',
                        help='number of total epochs to run')
    # 训练的batch size
    parser.add_argument('--batch_size', default=4, type=int, metavar='N',
                        help='batch size when training.')

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/ssd/train_utils/__init__.py
================================================
from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator
from .distributed_utils import init_distributed_mode, save_on_master, mkdir
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups


================================================
FILE: pytorch_object_detection/ssd/train_utils/coco_eval.py
================================================
import json
import copy
from collections import defaultdict

import numpy as np
import torch
import torch._six

from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util

from train_utils.distributed_utils import all_gather


class CocoEvaluator(object):
    def __init__(self, coco_gt, iou_types):
        assert isinstance(iou_types, (list, tuple))
        coco_gt = copy.deepcopy(coco_gt)
        self.coco_gt = coco_gt

        self.iou_types = iou_types
        self.coco_eval = {}
        for iou_type in iou_types:
            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)

        self.img_ids = []
        self.eval_imgs = {k: [] for k in iou_types}

    def update(self, predictions):
        img_ids = list(np.unique(list(predictions.keys())))
        self.img_ids.extend(img_ids)

        for iou_type in self.iou_types:
            results = self.prepare(predictions, iou_type)
            coco_dt = loadRes(self.coco_gt, results) if results else COCO()
            coco_eval = self.coco_eval[iou_type]

            coco_eval.cocoDt = coco_dt
            coco_eval.params.imgIds = list(img_ids)
            img_ids, eval_imgs = evaluate(coco_eval)

            self.eval_imgs[iou_type].append(eval_imgs)

    def synchronize_between_processes(self):
        for iou_type in self.iou_types:
            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])

    def accumulate(self):
        for coco_eval in self.coco_eval.values():
            coco_eval.accumulate()

    def summarize(self):
        for iou_type, coco_eval in self.coco_eval.items():
            print("IoU metric: {}".format(iou_type))
            coco_eval.summarize()

    def prepare(self, predictions, iou_type):
        if iou_type == "bbox":
            return self.prepare_for_coco_detection(predictions)
        elif iou_type == "segm":
            return self.prepare_for_coco_segmentation(predictions)
        elif iou_type == "keypoints":
            return self.prepare_for_coco_keypoint(predictions)
        else:
            raise ValueError("Unknown iou type {}".format(iou_type))

    def prepare_for_coco_detection(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            # xmin, ymin, xmax, ymax
            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes)
            boxes = boxes.tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "bbox": box,
                        "score": scores[k],
                    }
                    for k, box in enumerate(boxes)
                ]
            )
        return coco_results

    def prepare_for_coco_segmentation(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            scores = prediction["scores"]
            labels = prediction["labels"]
            masks = prediction["masks"]

            masks = masks > 0.5

            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            rles = [
                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                for mask in masks
            ]
            for rle in rles:
                rle["counts"] = rle["counts"].decode("utf-8")

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "segmentation": rle,
                        "score": scores[k],
                    }
                    for k, rle in enumerate(rles)
                ]
            )
        return coco_results

    def prepare_for_coco_keypoint(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            keypoints = prediction["keypoints"]
            keypoints = keypoints.flatten(start_dim=1).tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        'keypoints': keypoint,
                        "score": scores[k],
                    }
                    for k, keypoint in enumerate(keypoints)
                ]
            )
        return coco_results


def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)


def merge(img_ids, eval_imgs):
    all_img_ids = all_gather(img_ids)
    all_eval_imgs = all_gather(eval_imgs)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_imgs = []
    for p in all_eval_imgs:
        merged_eval_imgs.append(p)

    merged_img_ids = np.array(merged_img_ids)
    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)

    # keep only unique (and in sorted order) images
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_imgs = merged_eval_imgs[..., idx]

    return merged_img_ids, merged_eval_imgs


def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
    img_ids, eval_imgs = merge(img_ids, eval_imgs)
    img_ids = list(img_ids)
    eval_imgs = list(eval_imgs.flatten())

    coco_eval.evalImgs = eval_imgs
    coco_eval.params.imgIds = img_ids
    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)


#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################

# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions

def createIndex(self):
    # create index
    # print('creating index...')
    anns, cats, imgs = {}, {}, {}
    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
    if 'annotations' in self.dataset:
        for ann in self.dataset['annotations']:
            imgToAnns[ann['image_id']].append(ann)
            anns[ann['id']] = ann

    if 'images' in self.dataset:
        for img in self.dataset['images']:
            imgs[img['id']] = img

    if 'categories' in self.dataset:
        for cat in self.dataset['categories']:
            cats[cat['id']] = cat

    if 'annotations' in self.dataset and 'categories' in self.dataset:
        for ann in self.dataset['annotations']:
            catToImgs[ann['category_id']].append(ann['image_id'])

    # print('index created!')

    # create class members
    self.anns = anns
    self.imgToAnns = imgToAnns
    self.catToImgs = catToImgs
    self.imgs = imgs
    self.cats = cats


maskUtils = mask_util


def loadRes(self, resFile):
    """
    Load result file and return a result api object.
    :param   resFile (str)     : file name of result file
    :return: res (obj)         : result api object
    """
    res = COCO()
    res.dataset['images'] = [img for img in self.dataset['images']]

    # print('Loading and preparing results...')
    # tic = time.time()
    if isinstance(resFile, torch._six.string_classes):
        anns = json.load(open(resFile))
    elif type(resFile) == np.ndarray:
        anns = self.loadNumpyAnnotations(resFile)
    else:
        anns = resFile
    assert type(anns) == list, 'results in not an array of objects'
    annsImgIds = [ann['image_id'] for ann in anns]
    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
        'Results do not correspond to current coco set'
    if 'caption' in anns[0]:
        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
        for id, ann in enumerate(anns):
            ann['id'] = id + 1
    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            bb = ann['bbox']
            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
            if 'segmentation' not in ann:
                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
            ann['area'] = bb[2] * bb[3]
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'segmentation' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            # now only support compressed RLE format as segmentation results
            ann['area'] = maskUtils.area(ann['segmentation'])
            if 'bbox' not in ann:
                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'keypoints' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            s = ann['keypoints']
            x = s[0::3]
            y = s[1::3]
            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
            ann['area'] = (x2 - x1) * (y2 - y1)
            ann['id'] = id + 1
            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))

    res.dataset['annotations'] = anns
    createIndex(res)
    return res


def evaluate(self):
    '''
    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
    :return: None
    '''
    # tic = time.time()
    # print('Running per image evaluation...')
    p = self.params
    # add backward compatibility if useSegm is specified in params
    if p.useSegm is not None:
        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
    # print('Evaluate annotation type *{}*'.format(p.iouType))
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)
    self.params = p

    self._prepare()
    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]

    if p.iouType == 'segm' or p.iouType == 'bbox':
        computeIoU = self.computeIoU
    elif p.iouType == 'keypoints':
        computeIoU = self.computeOks
    self.ious = {
        (imgId, catId): computeIoU(imgId, catId)
        for imgId in p.imgIds
        for catId in catIds}

    evaluateImg = self.evaluateImg
    maxDet = p.maxDets[-1]
    evalImgs = [
        evaluateImg(imgId, catId, areaRng, maxDet)
        for catId in catIds
        for areaRng in p.areaRng
        for imgId in p.imgIds
    ]
    # this is NOT in the pycocotools code, but could be done outside
    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
    self._paramsEval = copy.deepcopy(self.params)
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format(toc-tic))
    return p.imgIds, evalImgs

#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################


================================================
FILE: pytorch_object_detection/ssd/train_utils/coco_utils.py
================================================
from tqdm import tqdm

import torch
import torchvision
import torch.utils.data
from pycocotools.coco import COCO


def convert_to_coco_api(ds):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0
    ann_id = 1
    dataset = {'images': [], 'categories': [], 'annotations': []}
    categories = set()
    for img_idx in range(len(ds)):
        # find better way to get target
        targets = ds.coco_index(img_idx)
        image_id = targets["image_id"].item()
        img_dict = {}
        img_dict['id'] = image_id
        # img_dict['height'] = img.shape[-2]
        # img_dict['width'] = img.shape[-1]
        img_dict['height'] = targets["height_width"][0]
        img_dict['width'] = targets["height_width"][1]
        dataset['images'].append(img_dict)

        # xmin, ymin, xmax, ymax
        bboxes = targets["boxes"]

        # (xmin, ymin, xmax, ymax) to (xmin, ymin, w, h)
        bboxes[:, 2:] -= bboxes[:, :2]
        # 将box的相对坐标信息（0-1）转为绝对值坐标
        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * img_dict["width"]
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * img_dict["height"]
        bboxes = bboxes.tolist()
        labels = targets['labels'].tolist()
        # 注意这里的boxes area也要进行转换，否则导致(small, medium, large)计算错误
        areas = (targets['area'] * img_dict["width"] * img_dict["height"]).tolist()
        iscrowd = targets['iscrowd'].tolist()
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {}
            ann['image_id'] = image_id
            ann['bbox'] = bboxes[i]
            ann['category_id'] = labels[i]
            categories.add(labels[i])
            ann['area'] = areas[i]
            ann['iscrowd'] = iscrowd[i]
            ann['id'] = ann_id
            dataset['annotations'].append(ann)
            ann_id += 1
    dataset['categories'] = [{'id': i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds


def get_coco_api_from_dataset(dataset):
    for _ in range(10):
        if isinstance(dataset, torchvision.datasets.CocoDetection):
            break
        if isinstance(dataset, torch.utils.data.Subset):
            dataset = dataset.dataset
    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return dataset.coco
    return convert_to_coco_api(dataset)


================================================
FILE: pytorch_object_detection/ssd/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,

                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_object_detection/ssd/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k+1等份
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_object_detection/ssd/train_utils/train_eval_utils.py
================================================
import math
import sys
import time

import torch

from train_utils import get_coco_api_from_dataset, CocoEvaluator
import train_utils.distributed_utils as utils


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=50, warmup=False):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 5.0 / 10000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    mloss = torch.zeros(1).to(device)  # mean losses
    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        # batch inputs information
        images = torch.stack(images, dim=0)

        boxes = []
        labels = []
        img_id = []
        for t in targets:
            boxes.append(t['boxes'])
            labels.append(t['labels'])
            img_id.append(t["image_id"])
        targets = {"boxes": torch.stack(boxes, dim=0),
                   "labels": torch.stack(labels, dim=0),
                   "image_id": torch.as_tensor(img_id)}

        images = images.to(device)

        targets = {k: v.to(device) for k, v in targets.items()}
        losses_dict = model(images, targets)
        losses = losses_dict["total_losses"]

        # reduce losses over all GPUs for logging purpose
        losses_dict_reduced = utils.reduce_dict(losses_dict)
        losses_reduce = losses_dict_reduced["total_losses"]

        loss_value = losses_reduce.detach()
        # 记录训练损失
        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses

        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练
            print("Loss is {}, stopping training".format(loss_value))
            print(losses_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

        # metric_logger.update(loss=losses, **loss_dict_reduced)
        metric_logger.update(**losses_dict_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, device, data_set=None):

    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    if data_set is None:
        data_set = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(data_set, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = torch.stack(images, dim=0).to(device)

        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        #  list((bboxes_out, labels_out, scores_out), ...)
        results = model(images, targets=None)
        model_time = time.time() - model_time

        outputs = []
        for index, (bboxes_out, labels_out, scores_out) in enumerate(results):
            # 将box的相对坐标信息（0-1）转为绝对值坐标(xmin, ymin, xmax, ymax)
            height_width = targets[index]["height_width"]
            # 还原回原图尺度
            bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1]
            bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0]

            info = {"boxes": bboxes_out.to(cpu_device),
                    "labels": labels_out.to(cpu_device),
                    "scores": scores_out.to(cpu_device)}
            outputs.append(info)

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list

    return coco_info


def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    return iou_types


================================================
FILE: pytorch_object_detection/ssd/transforms.py
================================================
import random

import torch
import torchvision.transforms as t
from torchvision.transforms import functional as F

from src import dboxes300_coco, calc_iou_tensor, Encoder


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target=None):
        for trans in self.transforms:
            image, target = trans(image, target)
        return image, target


class ToTensor(object):
    """将PIL图像转为Tensor"""
    def __call__(self, image, target):
        image = F.to_tensor(image).contiguous()
        return image, target


class RandomHorizontalFlip(object):
    """随机水平翻转图像以及bboxes,该方法应放在ToTensor后"""
    def __init__(self, prob=0.5):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            # height, width = image.shape[-2:]
            image = image.flip(-1)  # 水平翻转图片
            bbox = target["boxes"]
            # bbox: xmin, ymin, xmax, ymax
            # bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
            bbox[:, [0, 2]] = 1.0 - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
            target["boxes"] = bbox
        return image, target


# This function is from https://github.com/chauhan-utk/ssd.DomainAdaptation.
class SSDCropping(object):
    """
    根据原文，对图像进行裁剪,该方法应放在ToTensor前
    Cropping for SSD, according to original paper
    Choose between following 3 conditions:
    1. Preserve the original image
    2. Random crop minimum IoU is among 0.1, 0.3, 0.5, 0.7, 0.9
    3. Random crop
    Reference to https://github.com/chauhan-utk/src.DomainAdaptation
    """
    def __init__(self):
        self.sample_options = (
            # Do nothing
            None,
            # min IoU, max IoU
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            # no IoU requirements
            (None, None),
        )
        self.dboxes = dboxes300_coco()

    def __call__(self, image, target):
        # Ensure always return cropped image
        while True:
            mode = random.choice(self.sample_options)
            if mode is None:  # 不做随机裁剪处理
                return image, target

            htot, wtot = target['height_width']

            min_iou, max_iou = mode
            min_iou = float('-inf') if min_iou is None else min_iou
            max_iou = float('+inf') if max_iou is None else max_iou

            # Implementation use 5 iteration to find possible candidate
            for _ in range(5):
                # 0.3*0.3 approx. 0.1
                w = random.uniform(0.3, 1.0)
                h = random.uniform(0.3, 1.0)

                if w/h < 0.5 or w/h > 2:  # 保证宽高比例在0.5-2之间
                    continue

                # left 0 ~ wtot - w, top 0 ~ htot - h
                left = random.uniform(0, 1.0 - w)
                top = random.uniform(0, 1.0 - h)

                right = left + w
                bottom = top + h

                # boxes的坐标是在0-1之间的
                bboxes = target["boxes"]
                ious = calc_iou_tensor(bboxes, torch.tensor([[left, top, right, bottom]]))

                # tailor all the bboxes and return
                # all(): Returns True if all elements in the tensor are True, False otherwise.
                if not ((ious > min_iou) & (ious < max_iou)).all():
                    continue

                # discard any bboxes whose center not in the cropped image
                xc = 0.5 * (bboxes[:, 0] + bboxes[:, 2])
                yc = 0.5 * (bboxes[:, 1] + bboxes[:, 3])

                # 查找所有的gt box的中心点有没有在采样patch中的
                masks = (xc > left) & (xc < right) & (yc > top) & (yc < bottom)

                # if no such boxes, continue searching again
                # 如果所有的gt box的中心点都不在采样的patch中，则重新找
                if not masks.any():
                    continue

                # 修改采样patch中的所有gt box的坐标（防止出现越界的情况）
                bboxes[bboxes[:, 0] < left, 0] = left
                bboxes[bboxes[:, 1] < top, 1] = top
                bboxes[bboxes[:, 2] > right, 2] = right
                bboxes[bboxes[:, 3] > bottom, 3] = bottom

                # 虑除不在采样patch中的gt box
                bboxes = bboxes[masks, :]
                # 获取在采样patch中的gt box的标签
                labels = target['labels']
                labels = labels[masks]

                # 裁剪patch
                left_idx = int(left * wtot)
                top_idx = int(top * htot)
                right_idx = int(right * wtot)
                bottom_idx = int(bottom * htot)
                image = image.crop((left_idx, top_idx, right_idx, bottom_idx))

                # 调整裁剪后的bboxes坐标信息
                bboxes[:, 0] = (bboxes[:, 0] - left) / w
                bboxes[:, 1] = (bboxes[:, 1] - top) / h
                bboxes[:, 2] = (bboxes[:, 2] - left) / w
                bboxes[:, 3] = (bboxes[:, 3] - top) / h

                # 更新crop后的gt box坐标信息以及标签信息
                target['boxes'] = bboxes
                target['labels'] = labels

                return image, target


class Resize(object):
    """对图像进行resize处理,该方法应放在ToTensor前"""
    def __init__(self, size=(300, 300)):
        self.resize = t.Resize(size)

    def __call__(self, image, target):
        image = self.resize(image)
        return image, target


class ColorJitter(object):
    """对图像颜色信息进行随机调整,该方法应放在ToTensor前"""
    def __init__(self, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05):
        self.trans = t.ColorJitter(brightness, contrast, saturation, hue)

    def __call__(self, image, target):
        image = self.trans(image)
        return image, target


class Normalization(object):
    """对图像标准化处理,该方法应放在ToTensor后"""
    def __init__(self, mean=None, std=None):
        if mean is None:
            mean = [0.485, 0.456, 0.406]
        if std is None:
            std = [0.229, 0.224, 0.225]
        self.normalize = t.Normalize(mean=mean, std=std)

    def __call__(self, image, target):
        image = self.normalize(image)
        return image, target


class AssignGTtoDefaultBox(object):
    """将DefaultBox与GT进行匹配"""
    def __init__(self):
        self.default_box = dboxes300_coco()
        self.encoder = Encoder(self.default_box)

    def __call__(self, image, target):
        boxes = target['boxes']
        labels = target["labels"]
        # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)
        bboxes_out, labels_out = self.encoder.encode(boxes, labels)
        target['boxes'] = bboxes_out
        target['labels'] = labels_out

        return image, target


================================================
FILE: pytorch_object_detection/ssd/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
以及每个类别的mAP(IoU=0.5)
"""

import os
import json

import torch
from tqdm import tqdm
import numpy as np

import transforms
from src import Backbone, SSD300
from my_dataset import VOCDataSet
from train_utils import get_coco_api_from_dataset, CocoEvaluator


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 12, [""] * 12
    stats[0], print_list[0] = _summarize(1)
    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])
    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])
    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])
    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {
        "val": transforms.Compose([transforms.Resize(),
                                   transforms.ToTensor(),
                                   transforms.Normalization()])
    }

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {v: k for k, v in class_dict.items()}

    VOC_root = parser_data.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt
    val_dataset = VOCDataSet(VOC_root, "2012", transforms=data_transform["val"], train_set="val.txt")
    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=nw,
                                                     pin_memory=True,
                                                     collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    backbone = Backbone()
    model = SSD300(backbone=backbone, num_classes=parser_data.num_classes + 1)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights
    assert os.path.exists(weights_path), "not found {} file.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    # print(model)

    model.to(device)

    # evaluate on the test dataset
    coco = get_coco_api_from_dataset(val_dataset)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)
    cpu_device = torch.device("cpu")

    model.eval()
    with torch.no_grad():
        for images, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            images = torch.stack(images, dim=0).to(device)

            # inference
            results = model(images)

            outputs = []
            for index, (bboxes_out, labels_out, scores_out) in enumerate(results):
                # 将box的相对坐标信息（0-1）转为绝对值坐标(xmin, ymin, xmax, ymax)
                height_width = targets[index]["height_width"]
                # 还原回原图尺度
                bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1]
                bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0]

                info = {"boxes": bboxes_out.to(cpu_device),
                        "labels": labels_out.to(cpu_device),
                        "scores": scores_out.to(cpu_device)}
                outputs.append(info)

            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            coco_evaluator.update(res)

    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_eval = coco_evaluator.coco_eval["bbox"]
    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_eval)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    for i in range(len(category_index)):
        stats, _ = summarize(coco_eval, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = ["COCO results:",
                        print_coco,
                        "",
                        "mAP(IoU=0.5) for each category:",
                        print_voc]
        f.write("\n".join(record_lines))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda', help='device')

    # 检测目标类别数
    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')

    # 数据集的根目录(VOCdevkit根目录)
    parser.add_argument('--data-path', default='/data/', help='dataset root')

    # 训练好的权重文件
    parser.add_argument('--weights', default='./save_weights/model.pth', type=str, help='training weights')

    # batch size
    parser.add_argument('--batch_size', default=1, type=int, metavar='N',
                        help='batch size when validation.')

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_object_detection/train_coco_dataset/README.md
================================================
# 训练COCO2017数据集

## 该项目参考自pytorch官方torchvision模块中的源码(使用pycocotools处略有不同)
* https://github.com/pytorch/vision/tree/master/references/detection

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10.0
* pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs))
* Ubuntu或Centos(不建议Windows)
* 最好使用GPU训练
* 详细环境配置见```requirements.txt```

## 文件结构：
```
  ├── backbone: 特征提取网络，可以根据自己的要求选择，这里是以VGG16为例
  ├── network_files: Faster R-CNN网络（包括Fast R-CNN以及RPN等模块）
  ├── train_utils: 训练验证相关模块（包括pycocotools）
  ├── my_dataset.py: 自定义dataset用于读取COCO2017数据集
  ├── train.py: 以resnet50做为backbone进行训练
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件
  └── transforms.py: 数据预处理（随机水平翻转图像以及bboxes、将PIL图像转为Tensor）
```

## 预训练权重下载地址（下载后放入项目根目录）：
* Resnet50 https://download.pytorch.org/models/resnet50-19c8e357.pth
* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是`resnet50.pth`文件，
  不是`resnet50-19c8e357.pth`
 
 
## 数据集，本例程使用的是COCO2017数据集
* COCO官网地址：https://cocodataset.org/
* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/113247318
* 这里以下载coco2017数据集为例，主要下载三个文件：
    * `2017 Train images [118K/18GB]`：训练过程中使用到的所有图像文件
    * `2017 Val images [5K/1GB]`：验证过程中使用到的所有图像文件
    * `2017 Train/Val annotations [241MB]`：对应训练集和验证集的标注json文件
* 都解压到`coco2017`文件夹下，可得到如下文件结构：
```
├── coco2017: 数据集根目录
     ├── train2017: 所有训练图像文件夹(118287张)
     ├── val2017: 所有验证图像文件夹(5000张)
     └── annotations: 对应标注文件夹
              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件
              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件
              ├── captions_train2017.json: 对应图像描述的训练集标注文件
              ├── captions_val2017.json: 对应图像描述的验证集标注文件
              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件
              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹
```

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要使用单GPU训练直接使用train.py训练脚本
* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`

## 注意事项
* 在使用训练脚本时，注意要将`--data-path`设置为自己存放`coco2017`文件夹所在的**根目录**
* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率
* 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可，其他代码尽量不要改动

## 本项目训练得到的权重(Faster R-CNN + Resnet50)
* 链接: https://pan.baidu.com/s/1iF-Yl_9TkFFeAy-JysfGSw  密码: d2d8
* COCO2017验证集mAP：
```
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.277
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.453
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.290
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.126
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.308
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.378
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.243
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.358
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.366
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.169
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.402
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.512
```

## 如果对Faster RCNN原理不是很理解可参考我的bilibili
* https://b23.tv/sXcBSP

## Faster RCNN框架图
![Faster R-CNN](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing/raw/master/pytorch_object_detection/faster_rcnn/fasterRCNN.png) 


================================================
FILE: pytorch_object_detection/train_coco_dataset/backbone/__init__.py
================================================
from .resnet50_fpn_model import resnet50_fpn_backbone
from .mobilenetv2_model import MobileNetV2
from .vgg_model import vgg
from .resnet import *
from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool


================================================
FILE: pytorch_object_detection/train_coco_dataset/backbone/feature_pyramid_network.py
================================================
from collections import OrderedDict

import torch.nn as nn
import torch
from torch import Tensor
import torch.nn.functional as F

from torch.jit.annotations import Tuple, List, Dict


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model
    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.
    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.
    Arguments:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model, return_layers):
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")

        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}
        layers = OrderedDict()

        # 遍历模型子模块按顺序存入有序字典
        # 只保存layer4及其之前的结构，舍去之后不用的结构
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super().__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = OrderedDict()
        # 依次遍历模型的所有子模块，并进行正向传播，
        # 收集layer1, layer2, layer3, layer4的输出
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class BackboneWithFPN(nn.Module):
    """
    Adds a FPN on top of a model.
    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
    extract a submodel that returns the feature maps specified in return_layers.
    The same limitations of IntermediatLayerGetter apply here.
    Arguments:
        backbone (nn.Module)
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
        in_channels_list (List[int]): number of channels for each feature map
            that is returned, in the order they are present in the OrderedDict
        out_channels (int): number of channels in the FPN.
        extra_blocks: ExtraFPNBlock
    Attributes:
        out_channels (int): the number of channels in the FPN
    """

    def __init__(self,
                 backbone: nn.Module,
                 return_layers=None,
                 in_channels_list=None,
                 out_channels=256,
                 extra_blocks=None,
                 re_getter=True):
        super().__init__()

        if extra_blocks is None:
            extra_blocks = LastLevelMaxPool()

        if re_getter:
            assert return_layers is not None
            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        else:
            self.body = backbone

        self.fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=extra_blocks,
        )

        self.out_channels = out_channels

    def forward(self, x):
        x = self.body(x)
        x = self.fpn(x)
        return x


class FeaturePyramidNetwork(nn.Module):
    """
    Module that adds a FPN from on top of a set of feature maps. This is based on
    `"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
    The feature maps are currently supposed to be in increasing depth
    order.
    The input to the model is expected to be an OrderedDict[Tensor], containing
    the feature maps on top of which the FPN will be added.
    Arguments:
        in_channels_list (list[int]): number of channels for each feature map that
            is passed to the module
        out_channels (int): number of channels of the FPN representation
        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
            be performed. It is expected to take the fpn features, the original
            features and the names of the original features as input, and returns
            a new list of feature maps and their corresponding names
    """

    def __init__(self, in_channels_list, out_channels, extra_blocks=None):
        super().__init__()
        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）
        self.inner_blocks = nn.ModuleList()
        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
        self.layer_blocks = nn.ModuleList()
        for in_channels in in_channels_list:
            if in_channels == 0:
                continue
            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
            self.inner_blocks.append(inner_block_module)
            self.layer_blocks.append(layer_block_module)

        # initialize parameters now to avoid modifying the initialization of top_blocks
        for m in self.children():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)

        self.extra_blocks = extra_blocks

    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.inner_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.inner_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.inner_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
        """
        This is equivalent to self.layer_blocks[idx](x),
        but torchscript doesn't support this yet
        """
        num_blocks = len(self.layer_blocks)
        if idx < 0:
            idx += num_blocks
        i = 0
        out = x
        for module in self.layer_blocks:
            if i == idx:
                out = module(x)
            i += 1
        return out

    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
        """
        Computes the FPN for a set of feature maps.
        Arguments:
            x (OrderedDict[Tensor]): feature maps for each feature level.
        Returns:
            results (OrderedDict[Tensor]): feature maps after FPN layers.
                They are ordered from highest resolution first.
        """
        # unpack OrderedDict into two lists for easier handling
        names = list(x.keys())
        x = list(x.values())

        # 将resnet layer4的channel调整到指定的out_channels
        # last_inner = self.inner_blocks[-1](x[-1])
        last_inner = self.get_result_from_inner_blocks(x[-1], -1)
        # result中保存着每个预测特征层
        results = []
        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵
        # results.append(self.layer_blocks[-1](last_inner))
        results.append(self.get_result_from_layer_blocks(last_inner, -1))

        for idx in range(len(x) - 2, -1, -1):
            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
            feat_shape = inner_lateral.shape[-2:]
            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
            last_inner = inner_lateral + inner_top_down
            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))

        # 在layer4对应的预测特征层基础上生成预测特征矩阵5
        if self.extra_blocks is not None:
            results, names = self.extra_blocks(results, x, names)

        # make it back an OrderedDict
        out = OrderedDict([(k, v) for k, v in zip(names, results)])

        return out


class LastLevelMaxPool(torch.nn.Module):
    """
    Applies a max_pool2d on top of the last feature map
    """

    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:
        names.append("pool")
        x.append(F.max_pool2d(x[-1], 1, 2, 0))
        return x, names


================================================
FILE: pytorch_object_detection/train_coco_dataset/backbone/mobilenetv2_model.py
================================================
from torch import nn
import torch


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1, norm_layer=None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            norm_layer(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio, norm_layer=None):
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        layers = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1, norm_layer=norm_layer))
        layers.extend([
            # 3x3 depthwise conv
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel, norm_layer=norm_layer),
            # 1x1 pointwise conv(linear)
            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
            norm_layer(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8, weights_path=None, norm_layer=None):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # conv1 layer
        features.append(ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer))
        # building inverted residual residual blockes
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, last_channel, 1, norm_layer=norm_layer))
        # combine feature layers
        self.features = nn.Sequential(*features)

        # building classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        if weights_path is None:
            # weight initialization
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out')
                    if m.bias is not None:
                        nn.init.zeros_(m.bias)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.ones_(m.weight)
                    nn.init.zeros_(m.bias)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.zeros_(m.bias)
        else:
            self.load_state_dict(torch.load(weights_path))

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


================================================
FILE: pytorch_object_detection/train_coco_dataset/backbone/resnet.py
================================================
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    """
    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
    这么做的好处是能够在top1上提升大概0.5%的准确率。
    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
    """
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None,
                 groups=1, width_per_group=64):
        super(Bottleneck, self).__init__()

        width = int(out_channel * (width_per_group / 64.)) * groups

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(width)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self,
                 block,
                 blocks_num,
                 num_classes=1000,
                 include_top=True,
                 groups=1,
                 width_per_group=64):
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.groups = groups
        self.width_per_group = width_per_group

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel,
                            channel,
                            downsample=downsample,
                            stride=stride,
                            groups=self.groups,
                            width_per_group=self.width_per_group))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel,
                                channel,
                                groups=self.groups,
                                width_per_group=self.width_per_group))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet34(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet50(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnet50-19c8e357.pth
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


def resnext50_32x4d(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
    groups = 32
    width_per_group = 4
    return ResNet(Bottleneck, [3, 4, 6, 3],
                  num_classes=num_classes,
                  include_top=include_top,
                  groups=groups,
                  width_per_group=width_per_group)


def resnext101_32x8d(num_classes=1000, include_top=True):
    # https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
    groups = 32
    width_per_group = 8
    return ResNet(Bottleneck, [3, 4, 23, 3],
                  num_classes=num_classes,
                  include_top=include_top,
                  groups=groups,
                  width_per_group=width_per_group)


================================================
FILE: pytorch_object_detection/train_coco_dataset/backbone/resnet50_fpn_model.py
================================================
import os

import torch
import torch.nn as nn
from torchvision.ops.misc import FrozenBatchNorm2d

from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = norm_layer(out_channel)
        # -----------------------------------------
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = norm_layer(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = norm_layer(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def overwrite_eps(model, eps):
    """
    This method overwrites the default eps values of all the
    FrozenBatchNorm2d layers of the model with the provided value.
    This is necessary to address the BC-breaking change introduced
    by the bug-fix at pytorch/vision#2933. The overwrite is applied
    only when the pretrained weights are loaded to maintain compatibility
    with previous versions.

    Args:
        model (nn.Module): The model on which we perform the overwrite.
        eps (float): The new value of eps.
    """
    for module in model.modules():
        if isinstance(module, FrozenBatchNorm2d):
            module.eps = eps


def resnet50_fpn_backbone(pretrain_path="",
                          norm_layer=FrozenBatchNorm2d,  # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新
                          trainable_layers=3,
                          returned_layers=None,
                          extra_blocks=None):
    """
    搭建resnet50_fpn——backbone
    Args:
        pretrain_path: resnet50的预训练权重，如果不使用就默认为空
        norm_layer: 官方默认的是FrozenBatchNorm2d，即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差，还不如不用bn层)
                    如果自己的GPU显存很大可以设置很大的batch_size，那么自己可以传入正常的BatchNorm2d层
                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
        trainable_layers: 指定训练哪些层结构
        returned_layers: 指定哪些层的输出需要返回
        extra_blocks: 在输出的特征层基础上额外添加的层结构

    Returns:

    """
    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],
                             include_top=False,
                             norm_layer=norm_layer)

    if isinstance(norm_layer, FrozenBatchNorm2d):
        overwrite_eps(resnet_backbone, 0.0)

    if pretrain_path != "":
        assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path)
        # 载入预训练权重
        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))

    # select layers that wont be frozen
    assert 0 <= trainable_layers <= 5
    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]

    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1
    if trainable_layers == 5:
        layers_to_train.append("bn1")

    # freeze layers
    for name, parameter in resnet_backbone.named_parameters():
        # 只训练不在layers_to_train列表中的层结构
        if all([not name.startswith(layer) for layer in layers_to_train]):
            parameter.requires_grad_(False)

    if extra_blocks is None:
        extra_blocks = LastLevelMaxPool()

    if returned_layers is None:
        returned_layers = [1, 2, 3, 4]
    # 返回的特征层个数肯定大于0小于5
    assert min(returned_layers) > 0 and max(returned_layers) < 5

    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}

    # in_channel 为layer4的输出特征矩阵channel = 2048
    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256
    # 记录resnet50提供给fpn的每个特征层channel
    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]
    # 通过fpn后得到的每个特征层的channel
    out_channels = 256
    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)


================================================
FILE: pytorch_object_detection/train_coco_dataset/backbone/vgg_model.py
================================================
import torch.nn as nn
import torch


class VGG(nn.Module):
    def __init__(self, features, class_num=1000, init_weights=False, weights_path=None):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, class_num)
        )
        if init_weights and weights_path is None:
            self._initialize_weights()

        if weights_path is not None:
            self.load_state_dict(torch.load(weights_path))

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = torch.flatten(x, start_dim=1)
        # N x 512*7*7
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


def make_features(cfg: list):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU(True)]
            in_channels = v
    return nn.Sequential(*layers)


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg(model_name="vgg16", weights_path=None):
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]

    model = VGG(make_features(cfg), weights_path=weights_path)
    return model


================================================
FILE: pytorch_object_detection/train_coco_dataset/change_backbone_with_fpn.py
================================================
import os
import datetime

import torch

import transforms
from network_files import FasterRCNN, AnchorsGenerator
from my_dataset import CocoDetection
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from train_utils import train_eval_utils as utils
from backbone import BackboneWithFPN, LastLevelMaxPool


def create_model(num_classes):
    import torchvision
    from torchvision.models.feature_extraction import create_feature_extractor

    # --- mobilenet_v3_large fpn backbone --- #
    backbone = torchvision.models.mobilenet_v3_large(pretrained=True)
    # print(backbone)
    return_layers = {"features.6": "0",   # stride 8
                     "features.12": "1",  # stride 16
                     "features.16": "2"}  # stride 32
    # 提供给fpn的每个特征层channel
    in_channels_list = [40, 112, 960]
    new_backbone = create_feature_extractor(backbone, return_layers)
    # img = torch.randn(1, 3, 224, 224)
    # outputs = new_backbone(img)
    # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()]

    # --- efficientnet_b0 fpn backbone --- #
    # backbone = torchvision.models.efficientnet_b0(pretrained=True)
    # # print(backbone)
    # return_layers = {"features.3": "0",  # stride 8
    #                  "features.4": "1",  # stride 16
    #                  "features.8": "2"}  # stride 32
    # # 提供给fpn的每个特征层channel
    # in_channels_list = [40, 80, 1280]
    # new_backbone = create_feature_extractor(backbone, return_layers)
    # # img = torch.randn(1, 3, 224, 224)
    # # outputs = new_backbone(img)
    # # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()]

    backbone_with_fpn = BackboneWithFPN(new_backbone,
                                        return_layers=return_layers,
                                        in_channels_list=in_channels_list,
                                        out_channels=256,
                                        extra_blocks=LastLevelMaxPool(),
                                        re_getter=False)

    anchor_sizes = ((64,), (128,), (256,), (512,))
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    anchor_generator = AnchorsGenerator(sizes=anchor_sizes,
                                        aspect_ratios=aspect_ratios)

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2'],  # 在哪些特征层上进行RoIAlign pooling
                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone_with_fpn,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    COCO_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> instances_train2017.json
    train_dataset = CocoDetection(COCO_root, "train", data_transform["train"])
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)
    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # coco2017 -> annotations -> instances_val2017.json
    val_dataset = CocoDetection(COCO_root, "val", data_transform["val"])
    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      pin_memory=True,
                                                      num_workers=nw,
                                                      collate_fn=val_dataset.collate_fn)

    # create model num_classes equal background + classes
    model = create_model(num_classes=args.num_classes + 1)
    # print(model)

    model.to(device)

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=args.lr_steps,
                                                        gamma=args.lr_gamma)

    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练
    if args.resume != "":
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])
        print("the training process from epoch{}...".format(args.start_epoch))

    train_loss = []
    learning_rate = []
    val_map = []

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 10 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device=device, epoch=epoch,
                                              print_freq=50, warmup=True,
                                              scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_set_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/model-{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    # 学习率
    parser.add_argument('--lr', default=0.005, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练的batch size
    parser.add_argument('--batch_size', default=4, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/train_coco_dataset/coco91_indices.json
================================================
{
    "1": "person",
    "2": "bicycle",
    "3": "car",
    "4": "motorcycle",
    "5": "airplane",
    "6": "bus",
    "7": "train",
    "8": "truck",
    "9": "boat",
    "10": "traffic light",
    "11": "fire hydrant",
    "12": "N/A",
    "13": "stop sign",
    "14": "parking meter",
    "15": "bench",
    "16": "bird",
    "17": "cat",
    "18": "dog",
    "19": "horse",
    "20": "sheep",
    "21": "cow",
    "22": "elephant",
    "23": "bear",
    "24": "zebra",
    "25": "giraffe",
    "26": "N/A",
    "27": "backpack",
    "28": "umbrella",
    "29": "N/A",
    "30": "N/A",
    "31": "handbag",
    "32": "tie",
    "33": "suitcase",
    "34": "frisbee",
    "35": "skis",
    "36": "snowboard",
    "37": "sports ball",
    "38": "kite",
    "39": "baseball bat",
    "40": "baseball glove",
    "41": "skateboard",
    "42": "surfboard",
    "43": "tennis racket",
    "44": "bottle",
    "45": "N/A",
    "46": "wine glass",
    "47": "cup",
    "48": "fork",
    "49": "knife",
    "50": "spoon",
    "51": "bowl",
    "52": "banana",
    "53": "apple",
    "54": "sandwich",
    "55": "orange",
    "56": "broccoli",
    "57": "carrot",
    "58": "hot dog",
    "59": "pizza",
    "60": "donut",
    "61": "cake",
    "62": "chair",
    "63": "couch",
    "64": "potted plant",
    "65": "bed",
    "66": "N/A",
    "67": "dining table",
    "68": "N/A",
    "69": "N/A",
    "70": "toilet",
    "71": "N/A",
    "72": "tv",
    "73": "laptop",
    "74": "mouse",
    "75": "remote",
    "76": "keyboard",
    "77": "cell phone",
    "78": "microwave",
    "79": "oven",
    "80": "toaster",
    "81": "sink",
    "82": "refrigerator",
    "83": "N/A",
    "84": "book",
    "85": "clock",
    "86": "vase",
    "87": "scissors",
    "88": "teddy bear",
    "89": "hair drier",
    "90": "toothbrush"
}

================================================
FILE: pytorch_object_detection/train_coco_dataset/compute_receptive_field.py
================================================
# vgg16(D)
model = [[3, 1],
         [3, 1],
         [2, 2],  # maxpool
         [3, 1],
         [3, 1],
         [2, 2],  # maxpool
         [3, 1],
         [3, 1],
         [3, 1],
         [2, 2],  # maxpool
         [3, 1],
         [3, 1],
         [3, 1],
         [2, 2],  # maxpool
         [3, 1],
         [3, 1],
         [3, 1]]

field = model[-1][0]
for kernel, stride in model[::-1]:
    field = (field - 1) * stride + kernel
print(field)  # 228


================================================
FILE: pytorch_object_detection/train_coco_dataset/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = False):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: pytorch_object_detection/train_coco_dataset/my_dataset.py
================================================
import os
import json

import torch
from PIL import Image
import torch.utils.data as data
from pycocotools.coco import COCO


def _coco_remove_images_without_annotations(dataset, ids):
    """
    删除coco数据集中没有目标，或者目标面积非常小的数据
    refer to:
    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py
    :param dataset:
    :param cat_list:
    :return:
    """
    def _has_only_empty_bbox(anno):
        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)

    def _has_valid_annotation(anno):
        # if it's empty, there is no annotation
        if len(anno) == 0:
            return False
        # if all boxes have close to zero area, there is no annotation
        if _has_only_empty_bbox(anno):
            return False

        return True

    valid_ids = []
    for ds_idx, img_id in enumerate(ids):
        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)
        anno = dataset.loadAnns(ann_ids)

        if _has_valid_annotation(anno):
            valid_ids.append(img_id)

    return valid_ids


class CocoDetection(data.Dataset):
    """`MS Coco Detection <https://cocodataset.org/>`_ Dataset.

    Args:
        root (string): Root directory where images are downloaded to.
        annFile (string): Path to json annotation file.
        transforms (callable, optional): A function/transform that takes input sample and its target as entry
            and returns a transformed version.
    """

    def __init__(self, root, dataset="train", transforms=None):
        super(CocoDetection, self).__init__()
        assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]'
        anno_file = "instances_{}2017.json".format(dataset)
        assert os.path.exists(root), "file '{}' does not exist.".format(root)
        self.img_root = os.path.join(root, "{}2017".format(dataset))
        assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root)
        self.anno_path = os.path.join(root, "annotations", anno_file)
        assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path)

        self.mode = dataset
        self.transforms = transforms
        self.coco = COCO(self.anno_path)

        # 获取coco数据索引与类别名称的关系
        # 注意在object80中的索引并不是连续的，虽然只有80个类别，但索引还是按照stuff91来排序的
        data_classes = dict([(v["id"], v["name"]) for k, v in self.coco.cats.items()])
        max_index = max(data_classes.keys())  # 90
        # 将缺失的类别名称设置成N/A
        coco_classes = {}
        for k in range(1, max_index + 1):
            if k in data_classes:
                coco_classes[k] = data_classes[k]
            else:
                coco_classes[k] = "N/A"

        if dataset == "train":
            json_str = json.dumps(coco_classes, indent=4)
            with open("coco91_indices.json", "w") as f:
                f.write(json_str)

        self.coco_classes = coco_classes

        ids = list(sorted(self.coco.imgs.keys()))
        if dataset == "train":
            # 移除没有目标，或者目标面积非常小的数据
            valid_ids = _coco_remove_images_without_annotations(self.coco, ids)
            self.ids = valid_ids
        else:
            self.ids = ids

    def parse_targets(self,
                      img_id: int,
                      coco_targets: list,
                      w: int = None,
                      h: int = None):
        assert w > 0
        assert h > 0

        # 只筛选出单个对象的情况
        anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]

        boxes = [obj["bbox"] for obj in anno]

        # guard against no boxes via resizing
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2].clamp_(min=0, max=w)
        boxes[:, 1::2].clamp_(min=0, max=h)

        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)

        area = torch.tensor([obj["area"] for obj in anno])
        iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])

        # 筛选出合法的目标，即x_max>x_min且y_max>y_min
        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]
        area = area[keep]
        iscrowd = iscrowd[keep]

        target = {}
        target["boxes"] = boxes
        target["labels"] = classes
        target["image_id"] = torch.tensor([img_id])

        # for conversion to coco api
        target["area"] = area
        target["iscrowd"] = iscrowd

        return target

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        coco_target = coco.loadAnns(ann_ids)

        path = coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.img_root, path)).convert('RGB')

        w, h = img.size
        target = self.parse_targets(img_id, coco_target, w, h)
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.ids)

    def get_height_and_width(self, index):
        coco = self.coco
        img_id = self.ids[index]

        img_info = coco.loadImgs(img_id)[0]
        w = img_info["width"]
        h = img_info["height"]
        return h, w

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))


# train = CocoDetection("/data/coco_data/", dataset="train")
# print(len(train))
# t = train[0]
# print(t)

================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/__init__.py
================================================
from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor
from .rpn_function import AnchorsGenerator


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/boxes.py
================================================
import torch
from typing import Tuple
from torch import Tensor
import torchvision


def nms(boxes, scores, iou_threshold):
    # type: (Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Parameters
    ----------
    boxes : Tensor[N, 4])
        boxes to perform NMS on. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    iou_threshold : float
        discards all overlapping
        boxes with IoU > iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices
        of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)


def batched_nms(boxes, scores, idxs, iou_threshold):
    # type: (Tensor, Tensor, Tensor, float) -> Tensor
    """
    Performs non-maximum suppression in a batched fashion.

    Each index value correspond to a category, and NMS
    will not be applied between elements of different categories.

    Parameters
    ----------
    boxes : Tensor[N, 4]
        boxes where NMS will be performed. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    idxs : Tensor[N]
        indices of the categories for each one of the boxes.
    iou_threshold : float
        discards all overlapping boxes
        with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices of
        the elements that have been kept by NMS, sorted
        in decreasing order of scores
    """
    if boxes.numel() == 0:
        return torch.empty((0,), dtype=torch.int64, device=boxes.device)

    # strategy: in order to perform NMS independently per class.
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）
    max_coordinate = boxes.max()

    # to(): Performs Tensor dtype and/or device conversion
    # 为每一个类别/每一层生成一个很大的偏移量
    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致
    offsets = idxs.to(boxes) * (max_coordinate + 1)
    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    return keep


def remove_small_boxes(boxes, min_size):
    # type: (Tensor, float) -> Tensor
    """
    Remove boxes which contains at least one side smaller than min_size.
    移除宽高小于指定阈值的索引
    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        min_size (float): minimum size

    Returns:
        keep (Tensor[K]): indices of the boxes that have both sides
            larger than min_size
    """
    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高
    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True
    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))
    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input
    # keep = keep.nonzero().squeeze(1)
    keep = torch.where(keep)[0]
    return keep


def clip_boxes_to_image(boxes, size):
    # type: (Tensor, Tuple[int, int]) -> Tensor
    """
    Clip boxes so that they lie inside an image of size `size`.
    裁剪预测的boxes信息，将越界的坐标调整到图片边界上

    Arguments:
        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
        size (Tuple[height, width]): size of the image

    Returns:
        clipped_boxes (Tensor[N, 4])
    """
    dim = boxes.dim()
    boxes_x = boxes[..., 0::2]  # x1, x2
    boxes_y = boxes[..., 1::2]  # y1, y2
    height, width = size

    if torchvision._is_tracing():
        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
    else:
        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间
        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间

    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
    return clipped_boxes.reshape(boxes.shape)


def box_area(boxes):
    """
    Computes the area of a set of bounding boxes, which are specified by its
    (x1, y1, x2, y2) coordinates.

    Arguments:
        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
            are expected to be in (x1, y1, x2, y2) format

    Returns:
        area (Tensor[N]): area for each box
    """
    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])


def box_iou(boxes1, boxes2):
    """
    Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

    Arguments:
        boxes1 (Tensor[N, 4])
        boxes2 (Tensor[M, 4])

    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)

    #  When the shapes do not match,
    #  the shape of the returned output tensor follows the broadcasting rules
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

    iou = inter / (area1[:, None] + area2 - inter)
    return iou


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/det_utils.py
================================================
import torch
import math
from typing import List, Tuple
from torch import Tensor


class BalancedPositiveNegativeSampler(object):
    """
    This class samples batches, ensuring that they contain a fixed proportion of positives
    """

    def __init__(self, batch_size_per_image, positive_fraction):
        # type: (int, float) -> None
        """
        Arguments:
            batch_size_per_image (int): number of elements to be selected per image
            positive_fraction (float): percentage of positive elements per batch
        """
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction

    def __call__(self, matched_idxs):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        Arguments:
            matched idxs: list of tensors containing -1, 0 or positive values.
                Each tensor corresponds to a specific image.
                -1 values are ignored, 0 are considered as negatives and > 0 as
                positives.

        Returns:
            pos_idx (list[tensor])
            neg_idx (list[tensor])

        Returns two lists of binary masks for each image.
        The first list contains the positive elements that were selected,
        and the second list the negative example.
        """
        pos_idx = []
        neg_idx = []
        # 遍历每张图像的matched_idxs
        for matched_idxs_per_image in matched_idxs:
            # >= 1的为正样本, nonzero返回非零元素索引
            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]
            # = 0的为负样本
            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]

            # 指定正样本的数量
            num_pos = int(self.batch_size_per_image * self.positive_fraction)
            # protect against not enough positive examples
            # 如果正样本数量不够就直接采用所有正样本
            num_pos = min(positive.numel(), num_pos)
            # 指定负样本数量
            num_neg = self.batch_size_per_image - num_pos
            # protect against not enough negative examples
            # 如果负样本数量不够就直接采用所有负样本
            num_neg = min(negative.numel(), num_neg)

            # randomly select positive and negative examples
            # Returns a random permutation of integers from 0 to n - 1.
            # 随机选择指定数量的正负样本
            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]

            pos_idx_per_image = positive[perm1]
            neg_idx_per_image = negative[perm2]

            # create binary mask from indices
            pos_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )
            neg_idx_per_image_mask = torch.zeros_like(
                matched_idxs_per_image, dtype=torch.uint8
            )

            pos_idx_per_image_mask[pos_idx_per_image] = 1
            neg_idx_per_image_mask[neg_idx_per_image] = 1

            pos_idx.append(pos_idx_per_image_mask)
            neg_idx.append(neg_idx_per_image_mask)

        return pos_idx, neg_idx


@torch.jit._script_if_tracing
def encode_boxes(reference_boxes, proposals, weights):
    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
    """
    Encode a set of proposals with respect to some
    reference boxes

    Arguments:
        reference_boxes (Tensor): reference boxes(gt)
        proposals (Tensor): boxes to be encoded(anchors)
        weights:
    """

    # perform some unpacking to make it JIT-fusion friendly
    wx = weights[0]
    wy = weights[1]
    ww = weights[2]
    wh = weights[3]

    # unsqueeze()
    # Returns a new tensor with a dimension of size one inserted at the specified position.
    proposals_x1 = proposals[:, 0].unsqueeze(1)
    proposals_y1 = proposals[:, 1].unsqueeze(1)
    proposals_x2 = proposals[:, 2].unsqueeze(1)
    proposals_y2 = proposals[:, 3].unsqueeze(1)

    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)
    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)
    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)
    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)

    # implementation starts here
    # parse widths and heights
    ex_widths = proposals_x2 - proposals_x1
    ex_heights = proposals_y2 - proposals_y1
    # parse coordinate of center point
    ex_ctr_x = proposals_x1 + 0.5 * ex_widths
    ex_ctr_y = proposals_y1 + 0.5 * ex_heights

    gt_widths = reference_boxes_x2 - reference_boxes_x1
    gt_heights = reference_boxes_y2 - reference_boxes_y1
    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths
    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights

    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = ww * torch.log(gt_widths / ex_widths)
    targets_dh = wh * torch.log(gt_heights / ex_heights)

    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
    return targets


class BoxCoder(object):
    """
    This class encodes and decodes a set of bounding boxes into
    the representation used for training the regressors.
    """

    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
        # type: (Tuple[float, float, float, float], float) -> None
        """
        Arguments:
            weights (4-element tuple)
            bbox_xform_clip (float)
        """
        self.weights = weights
        self.bbox_xform_clip = bbox_xform_clip

    def encode(self, reference_boxes, proposals):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        结合anchors和与之对应的gt计算regression参数
        Args:
            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes
            proposals: List[Tensor] anchors/proposals

        Returns: regression parameters

        """
        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开
        # reference_boxes和proposal数据结构相同
        boxes_per_image = [len(b) for b in reference_boxes]
        reference_boxes = torch.cat(reference_boxes, dim=0)
        proposals = torch.cat(proposals, dim=0)

        # targets_dx, targets_dy, targets_dw, targets_dh
        targets = self.encode_single(reference_boxes, proposals)
        return targets.split(boxes_per_image, 0)

    def encode_single(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """
        dtype = reference_boxes.dtype
        device = reference_boxes.device
        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)
        targets = encode_boxes(reference_boxes, proposals, weights)

        return targets

    def decode(self, rel_codes, boxes):
        # type: (Tensor, List[Tensor]) -> Tensor
        """

        Args:
            rel_codes: bbox regression parameters
            boxes: anchors/proposals

        Returns:

        """
        assert isinstance(boxes, (list, tuple))
        assert isinstance(rel_codes, torch.Tensor)
        boxes_per_image = [b.size(0) for b in boxes]
        concat_boxes = torch.cat(boxes, dim=0)

        box_sum = 0
        for val in boxes_per_image:
            box_sum += val

        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标
        pred_boxes = self.decode_single(
            rel_codes, concat_boxes
        )

        # 防止pred_boxes为空时导致reshape报错
        if box_sum > 0:
            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)

        return pred_boxes

    def decode_single(self, rel_codes, boxes):
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Arguments:
            rel_codes (Tensor): encoded boxes (bbox regression parameters)
            boxes (Tensor): reference boxes (anchors/proposals)
        """
        boxes = boxes.to(rel_codes.dtype)

        # xmin, ymin, xmax, ymax
        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度
        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度
        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标
        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标

        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]
        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数
        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数
        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数
        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数

        # limit max value, prevent sending too large values into torch.exp()
        # self.bbox_xform_clip=math.log(1000. / 16)   4.135
        dw = torch.clamp(dw, max=self.bbox_xform_clip)
        dh = torch.clamp(dh, max=self.bbox_xform_clip)

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        # xmin
        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymin
        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
        # xmax
        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
        # ymax
        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h

        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
        return pred_boxes


class Matcher(object):
    BELOW_LOW_THRESHOLD = -1
    BETWEEN_THRESHOLDS = -2

    __annotations__ = {
        'BELOW_LOW_THRESHOLD': int,
        'BETWEEN_THRESHOLDS': int,
    }

    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
        # type: (float, float, bool) -> None
        """
        Args:
            high_threshold (float): quality values greater than or equal to
                this value are candidate matches.
            low_threshold (float): a lower quality threshold used to stratify
                matches into three levels:
                1) matches >= high_threshold
                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
            allow_low_quality_matches (bool): if True, produce additional matches
                for predictions that have only low-quality match candidates. See
                set_low_quality_matches_ for more details.
        """
        self.BELOW_LOW_THRESHOLD = -1
        self.BETWEEN_THRESHOLDS = -2
        assert low_threshold <= high_threshold
        self.high_threshold = high_threshold  # 0.7
        self.low_threshold = low_threshold    # 0.3
        self.allow_low_quality_matches = allow_low_quality_matches

    def __call__(self, match_quality_matrix):
        """
        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，
        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        """
        if match_quality_matrix.numel() == 0:
            # empty targets or proposals not supported during training
            if match_quality_matrix.shape[0] == 0:
                raise ValueError(
                    "No ground-truth boxes available for one of the images "
                    "during training")
            else:
                raise ValueError(
                    "No proposal boxes available for one of the images "
                    "during training")

        # match_quality_matrix is M (gt) x N (predicted)
        # Max over gt elements (dim 0) to find best gt candidate for each prediction
        # M x N 的每一列代表一个anchors与所有gt的匹配iou值
        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值
        # matches对应最大值所在的索引
        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.
        if self.allow_low_quality_matches:
            all_matches = matches.clone()
        else:
            all_matches = None

        # Assign candidate matches with low quality to negative (unassigned) values
        # 计算iou小于low_threshold的索引
        below_low_threshold = matched_vals < self.low_threshold
        # 计算iou在low_threshold与high_threshold之间的索引值
        between_thresholds = (matched_vals >= self.low_threshold) & (
            matched_vals < self.high_threshold
        )
        # iou小于low_threshold的matches索引置为-1
        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1

        # iou在[low_threshold, high_threshold]之间的matches索引置为-2
        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2

        if self.allow_low_quality_matches:
            assert all_matches is not None
            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)

        return matches

    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
        """
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        """
        # For each gt, find the prediction with which it has highest quality
        # 对于每个gt boxes寻找与其iou最大的anchor，
        # highest_quality_foreach_gt为匹配到的最大iou值
        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.

        # Find highest quality match available, even if it is low, including ties
        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor
        # gt_pred_pairs_of_highest_quality = torch.nonzero(
        #     match_quality_matrix == highest_quality_foreach_gt[:, None]
        # )
        gt_pred_pairs_of_highest_quality = torch.where(
            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])
        )
        # Example gt_pred_pairs_of_highest_quality:
        #   tensor([[    0, 39796],
        #           [    1, 32055],
        #           [    1, 32070],
        #           [    2, 39190],
        #           [    2, 40255],
        #           [    3, 40390],
        #           [    3, 41455],
        #           [    4, 45470],
        #           [    5, 45325],
        #           [    5, 46390]])
        # Each row is a (gt index, prediction index)
        # Note how gt items 1, 2, 3, and 5 each have two ties

        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)
        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]
        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值
        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]


def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):
    """
    very similar to the smooth_l1_loss from pytorch, but with
    the extra beta parameter
    """
    n = torch.abs(input - target)
    # cond = n < beta
    cond = torch.lt(n, beta)
    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
    if size_average:
        return loss.mean()
    return loss.sum()


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/faster_rcnn_framework.py
================================================
import warnings
from collections import OrderedDict
from typing import Tuple, List, Dict, Optional, Union

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torchvision.ops import MultiScaleRoIAlign

from .roi_head import RoIHeads
from .transform import GeneralizedRCNNTransform
from .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork


class FasterRCNNBase(nn.Module):
    """
    Main class for Generalized R-CNN.

    Arguments:
        backbone (nn.Module):
        rpn (nn.Module):
        roi_heads (nn.Module): takes the features + the proposals from the RPN and computes
            detections / masks from it.
        transform (nn.Module): performs the data transformation from the inputs to feed into
            the model
    """

    def __init__(self, backbone, rpn, roi_heads, transform):
        super(FasterRCNNBase, self).__init__()
        self.transform = transform
        self.backbone = backbone
        self.rpn = rpn
        self.roi_heads = roi_heads
        # used only on torchscript mode
        self._has_warned = False

    @torch.jit.unused
    def eager_outputs(self, losses, detections):
        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]
        if self.training:
            return losses

        return detections

    def forward(self, images, targets=None):
        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
        """
        Arguments:
            images (list[Tensor]): images to be processed
            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")

        if self.training:
            assert targets is not None
            for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError("Expected target boxes to be a tensor"
                                         "of shape [N, 4], got {:}.".format(
                                          boxes.shape))
                else:
                    raise ValueError("Expected target boxes to be of type "
                                     "Tensor, got {:}.".format(type(boxes)))

        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
        for img in images:
            val = img.shape[-2:]
            assert len(val) == 2  # 防止输入的是个一维向量
            original_image_sizes.append((val[0], val[1]))
        # original_image_sizes = [img.shape[-2:] for img in images]

        images, targets = self.transform(images, targets)  # 对图像进行预处理
        # print(images.tensors.shape)
        features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图
        if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’
            features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典

        # 将特征层以及标注target信息传入rpn中
        # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],
        # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式
        proposals, proposal_losses = self.rpn(images, features, targets)

        # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分
        detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)

        # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)

        if torch.jit.is_scripting():
            if not self._has_warned:
                warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting")
                self._has_warned = True
            return losses, detections
        else:
            return self.eager_outputs(losses, detections)

        # if self.training:
        #     return losses
        #
        # return detections


class TwoMLPHead(nn.Module):
    """
    Standard heads for FPN-based models

    Arguments:
        in_channels (int): number of input channels
        representation_size (int): size of the intermediate representation
    """

    def __init__(self, in_channels, representation_size):
        super(TwoMLPHead, self).__init__()

        self.fc6 = nn.Linear(in_channels, representation_size)
        self.fc7 = nn.Linear(representation_size, representation_size)

    def forward(self, x):
        x = x.flatten(start_dim=1)

        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))

        return x


class FastRCNNPredictor(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Arguments:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes):
        super(FastRCNNPredictor, self).__init__()
        self.cls_score = nn.Linear(in_channels, num_classes)
        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas


class FasterRCNN(FasterRCNNBase):
    """
    Implements Faster R-CNN.

    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
    image, and should be in 0-1 range. Different images can have different sizes.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values
          between 0 and H and 0 and W
        - labels (Int64Tensor[N]): the class label for each ground-truth box

    The model returns a Dict[Tensor] during training, containing the classification and regression
    losses for both the RPN and the R-CNN.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
    follows:
        - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between
          0 and H and 0 and W
        - labels (Int64Tensor[N]): the predicted labels for each image
        - scores (Tensor[N]): the scores or each prediction

    Arguments:
        backbone (nn.Module): the network used to compute the features for the model.
            It should contain a out_channels attribute, which indicates the number of output
            channels that each feature map has (and it should be the same for all feature maps).
            The backbone should return a single Tensor or and OrderedDict[Tensor].
        num_classes (int): number of output classes of the model (including the background).
            If box_predictor is specified, num_classes should be None.
        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
        image_mean (Tuple[float, float, float]): mean values used for input normalization.
            They are generally the mean values of the dataset on which the backbone has been trained
            on
        image_std (Tuple[float, float, float]): std values used for input normalization.
            They are generally the std values of the dataset on which the backbone has been trained on
        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training of the RPN.
        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training of the RPN.
        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
            for computing the loss
        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
        rpn_score_thresh (float): during inference, only return proposals with a classification score
            greater than rpn_score_thresh
        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
            the locations indicated by the bounding boxes
        box_head (nn.Module): module that takes the cropped feature maps as input
        box_predictor (nn.Module): module that takes the output of box_head and returns the
            classification logits and box regression deltas.
        box_score_thresh (float): during inference, only return proposals with a classification score
            greater than box_score_thresh
        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
        box_detections_per_img (int): maximum number of detections per image, for all classes.
        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
            considered as positive during training of the classification head
        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
            considered as negative during training of the classification head
        box_batch_size_per_image (int): number of proposals that are sampled during training of the
            classification head
        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
            of the classification head
        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
            bounding boxes

    """

    def __init__(self, backbone, num_classes=None,
                 # transform parameter
                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸
                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数
                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例
                 rpn_score_thresh=0.0,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值
                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例
                 bbox_reg_weights=None):
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels"
                "specifying the number of output channels  (assumed to be the"
                "same for all the levels"
            )

        assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError("num_classes should be None when box_predictor "
                                 "is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should not be None when box_predictor "
                                 "is not specified")

        # 预测特征层的channels
        out_channels = backbone.out_channels

        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器
        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorsGenerator(
                anchor_sizes, aspect_ratios
            )

        # 生成RPN通过滑动窗口预测网络部分
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,
        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,
        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

        # 定义整个RPN框架
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,
            score_thresh=rpn_score_thresh)

        #  Multi-scale RoIAlign pooling
        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling
                output_size=[7, 7],
                sampling_ratio=2)

        # fast RCNN中roi pooling后的展平处理两个全连接层部分
        if box_head is None:
            resolution = box_roi_pool.output_size[0]  # 默认等于7
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size
            )

        # 在box_head的输出上预测部分
        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                num_classes)

        # 将roi pooling, box_head以及box_predictor结合在一起
        roi_heads = RoIHeads(
            # box
            box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5
            box_batch_size_per_image, box_positive_fraction,  # 512  0.25
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]

        # 对数据进行标准化，缩放，打包成batch等处理部分
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/image_list.py
================================================
from typing import List, Tuple
from torch import Tensor


class ImageList(object):
    """
    Structure that holds a list of images (of possibly
    varying sizes) as a single tensor.
    This works by padding the images to the same size,
    and storing in a field the original sizes of each image
    """

    def __init__(self, tensors, image_sizes):
        # type: (Tensor, List[Tuple[int, int]]) -> None
        """
        Arguments:
            tensors (tensor) padding后的图像数据
            image_sizes (list[tuple[int, int]])  padding前的图像尺寸
        """
        self.tensors = tensors
        self.image_sizes = image_sizes

    def to(self, device):
        # type: (Device) -> ImageList # noqa
        cast_tensor = self.tensors.to(device)
        return ImageList(cast_tensor, self.image_sizes)


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/roi_head.py
================================================
from typing import Optional, List, Dict, Tuple

import torch
from torch import Tensor
import torch.nn.functional as F

from . import det_utils
from . import boxes as box_ops


def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    Computes the loss for Faster R-CNN.

    Arguments:
        class_logits : 预测类别概率信息，shape=[num_anchors, num_classes]
        box_regression : 预测边目标界框回归信息
        labels : 真实类别信息
        regression_targets : 真实目标边界框信息

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """

    labels = torch.cat(labels, dim=0)
    regression_targets = torch.cat(regression_targets, dim=0)

    # 计算类别损失信息
    classification_loss = F.cross_entropy(class_logits, labels)

    # get indices that correspond to the regression targets for
    # the corresponding ground truth labels, to be used with
    # advanced indexing
    # 返回标签类别大于0的索引
    # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1)
    sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0]

    # 返回标签类别大于0位置的类别信息
    labels_pos = labels[sampled_pos_inds_subset]

    # shape=[num_proposal, num_classes]
    N, num_classes = class_logits.shape
    box_regression = box_regression.reshape(N, -1, 4)

    # 计算边界框损失信息
    box_loss = det_utils.smooth_l1_loss(
        # 获取指定索引proposal的指定类别box信息
        box_regression[sampled_pos_inds_subset, labels_pos],
        regression_targets[sampled_pos_inds_subset],
        beta=1 / 9,
        size_average=False,
    ) / labels.numel()

    return classification_loss, box_loss


class RoIHeads(torch.nn.Module):
    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
    }

    def __init__(self,
                 box_roi_pool,   # Multi-scale RoIAlign pooling
                 box_head,       # TwoMLPHead
                 box_predictor,  # FastRCNNPredictor
                 # Faster R-CNN training
                 fg_iou_thresh, bg_iou_thresh,  # default: 0.5, 0.5
                 batch_size_per_image, positive_fraction,  # default: 512, 0.25
                 bbox_reg_weights,  # None
                 # Faster R-CNN inference
                 score_thresh,        # default: 0.05
                 nms_thresh,          # default: 0.5
                 detection_per_img):  # default: 100
        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,  # default: 0.5
            bg_iou_thresh,  # default: 0.5
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image,  # default: 512
            positive_fraction)     # default: 0.25

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool    # Multi-scale RoIAlign pooling
        self.box_head = box_head            # TwoMLPHead
        self.box_predictor = box_predictor  # FastRCNNPredictor

        self.score_thresh = score_thresh  # default: 0.05
        self.nms_thresh = nms_thresh      # default: 0.5
        self.detection_per_img = detection_per_img  # default: 100

    def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        """
        为每个proposal匹配对应的gt_box，并划分到正负样本中
        Args:
            proposals:
            gt_boxes:
            gt_labels:

        Returns:

        """
        matched_idxs = []
        labels = []
        # 遍历每张图像的proposals, gt_boxes, gt_labels信息
        for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
            if gt_boxes_in_image.numel() == 0:  # 该张图像中没有gt框，为背景
                # background image
                device = proposals_in_image.device
                clamped_matched_idxs_in_image = torch.zeros(
                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device
                )
                labels_in_image = torch.zeros(
                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device
                )
            else:
                #  set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
                # 计算proposal与每个gt_box的iou重合度
                match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)

                # 计算proposal与每个gt_box匹配的iou最大值，并记录索引，
                # iou < low_threshold索引值为 -1， low_threshold <= iou < high_threshold索引值为 -2
                matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)

                # 限制最小值，防止匹配标签时出现越界的情况
                # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别（实际上并不是）,后续会进一步处理
                clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
                # 获取proposal匹配到的gt对应标签
                labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
                labels_in_image = labels_in_image.to(dtype=torch.int64)

                # label background (below the low threshold)
                # 将gt索引为-1的类别设置为0，即背景，负样本
                bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1
                labels_in_image[bg_inds] = 0

                # label ignore proposals (between low and high threshold)
                # 将gt索引为-2的类别设置为-1, 即废弃样本
                ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2
                labels_in_image[ignore_inds] = -1  # -1 is ignored by sampler

            matched_idxs.append(clamped_matched_idxs_in_image)
            labels.append(labels_in_image)
        return matched_idxs, labels

    def subsample(self, labels):
        # type: (List[Tensor]) -> List[Tensor]
        # BalancedPositiveNegativeSampler
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_inds = []
        # 遍历每张图片的正负样本索引
        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
            # 记录所有采集样本索引（包括正样本和负样本）
            # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
            img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
            sampled_inds.append(img_sampled_inds)
        return sampled_inds

    def add_gt_proposals(self, proposals, gt_boxes):
        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
        """
        将gt_boxes拼接到proposal后面
        Args:
            proposals: 一个batch中每张图像rpn预测的boxes
            gt_boxes:  一个batch中每张图像对应的真实目标边界框

        Returns:

        """
        proposals = [
            torch.cat((proposal, gt_box))
            for proposal, gt_box in zip(proposals, gt_boxes)
        ]
        return proposals

    def check_targets(self, targets):
        # type: (Optional[List[Dict[str, Tensor]]]) -> None
        assert targets is not None
        assert all(["boxes" in t for t in targets])
        assert all(["labels" in t for t in targets])

    def select_training_samples(self,
                                proposals,  # type: List[Tensor]
                                targets     # type: Optional[List[Dict[str, Tensor]]]
                                ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        """
        划分正负样本，统计对应gt的标签以及边界框回归信息
        list元素个数为batch_size
        Args:
            proposals: rpn预测的boxes
            targets:

        Returns:

        """

        # 检查target数据是否为空
        self.check_targets(targets)
        # 如果不加这句，jit.script会不通过(看不懂)
        assert targets is not None

        dtype = proposals[0].dtype
        device = proposals[0].device

        # 获取标注好的boxes以及labels信息
        gt_boxes = [t["boxes"].to(dtype) for t in targets]
        gt_labels = [t["labels"] for t in targets]

        # append ground-truth bboxes to proposal
        # 将gt_boxes拼接到proposal后面
        proposals = self.add_gt_proposals(proposals, gt_boxes)

        # get matching gt indices for each proposal
        # 为每个proposal匹配对应的gt_box，并划分到正负样本中
        matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
        # sample a fixed proportion of positive-negative proposals
        # 按给定数量和比例采样正负样本
        sampled_inds = self.subsample(labels)
        matched_gt_boxes = []
        num_images = len(proposals)

        # 遍历每张图像
        for img_id in range(num_images):
            # 获取每张图像的正负样本索引
            img_sampled_inds = sampled_inds[img_id]
            # 获取对应正负样本的proposals信息
            proposals[img_id] = proposals[img_id][img_sampled_inds]
            # 获取对应正负样本的真实类别信息
            labels[img_id] = labels[img_id][img_sampled_inds]
            # 获取对应正负样本的gt索引信息
            matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]

            gt_boxes_in_image = gt_boxes[img_id]
            if gt_boxes_in_image.numel() == 0:
                gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
            # 获取对应正负样本的gt box信息
            matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])

        # 根据gt和proposal计算边框回归参数（针对gt的）
        regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
        return proposals, labels, regression_targets

    def postprocess_detections(self,
                               class_logits,    # type: Tensor
                               box_regression,  # type: Tensor
                               proposals,       # type: List[Tensor]
                               image_shapes     # type: List[Tuple[int, int]]
                               ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        """
        对网络的预测数据进行后处理，包括
        （1）根据proposal以及预测的回归参数计算出最终bbox坐标
        （2）对预测类别结果进行softmax处理
        （3）裁剪预测的boxes信息，将越界的坐标调整到图片边界上
        （4）移除所有背景信息
        （5）移除低概率目标
        （6）移除小尺寸目标
        （7）执行nms处理，并按scores进行排序
        （8）根据scores排序返回前topk个目标
        Args:
            class_logits: 网络预测类别概率信息
            box_regression: 网络预测的边界框回归参数
            proposals: rpn输出的proposal
            image_shapes: 打包成batch前每张图像的宽高

        Returns:

        """
        device = class_logits.device
        # 预测目标类别数
        num_classes = class_logits.shape[-1]

        # 获取每张图像的预测bbox数量
        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
        # 根据proposal以及预测的回归参数计算出最终bbox坐标
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        # 对预测类别结果进行softmax处理
        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        # 根据每张图像的预测bbox数量分割结果
        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        # 遍历每张图像预测信息
        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
            # 裁剪预测的boxes信息，将越界的坐标调整到图片边界上
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove prediction with the background label
            # 移除索引为0的所有信息（0代表背景）
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            # 移除低概率目标，self.scores_thresh=0.05
            # gt: Computes input > other element-wise.
            # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1)
            inds = torch.where(torch.gt(scores, self.score_thresh))[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            # 移除小目标
            keep = box_ops.remove_small_boxes(boxes, min_size=1.)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximun suppression, independently done per class
            # 执行nms处理，执行后的结果会按照scores从大到小进行排序返回
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)

            # keep only topk scoring predictions
            # 获取scores排在前topk个预测目标
            keep = keep[:self.detection_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels

    def forward(self,
                features,       # type: Dict[str, Tensor]
                proposals,      # type: List[Tensor]
                image_shapes,   # type: List[Tuple[int, int]]
                targets=None    # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """

        # 检查targets的数据类型是否正确
        if targets is not None:
            for t in targets:
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t["boxes"].dtype in floating_point_types, "target boxes must of float type"
                assert t["labels"].dtype == torch.int64, "target labels must of int64 type"

        if self.training:
            # 划分正负样本，统计对应gt的标签以及边界框回归信息
            proposals, labels, regression_targets = self.select_training_samples(proposals, targets)
        else:
            labels = None
            regression_targets = None

        # 将采集样本通过Multi-scale RoIAlign pooling层
        # box_features_shape: [num_proposals, channel, height, width]
        box_features = self.box_roi_pool(features, proposals, image_shapes)

        # 通过roi_pooling后的两层全连接层
        # box_features_shape: [num_proposals, representation_size]
        box_features = self.box_head(box_features)

        # 接着分别预测目标类别和边界框回归参数
        class_logits, box_regression = self.box_predictor(box_features)

        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    {
                        "boxes": boxes[i],
                        "labels": labels[i],
                        "scores": scores[i],
                    }
                )

        return result, losses


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/rpn_function.py
================================================
from typing import List, Optional, Dict, Tuple

import torch
from torch import nn, Tensor
from torch.nn import functional as F
import torchvision

from . import det_utils
from . import boxes as box_ops
from .image_list import ImageList


@torch.jit.unused
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
    # type: (Tensor, int) -> Tuple[int, int]
    from torch.onnx import operators
    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
    pre_nms_top_n = torch.min(torch.cat(
        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),
         num_anchors), 0))

    return num_anchors, pre_nms_top_n


class AnchorsGenerator(nn.Module):
    __annotations__ = {
        "cell_anchors": Optional[List[torch.Tensor]],
        "_cache": Dict[str, List[torch.Tensor]]
    }

    """
    anchors生成器
    Module that generates anchors for a set of feature maps and
    image sizes.

    The module support computing anchors at multiple sizes and aspect ratios
    per feature map.

    sizes and aspect_ratios should have the same number of elements, and it should
    correspond to the number of feature maps.

    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
    per spatial location for feature map i.

    Arguments:
        sizes (Tuple[Tuple[int]]):
        aspect_ratios (Tuple[Tuple[float]]):
    """

    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):
        super(AnchorsGenerator, self).__init__()

        if not isinstance(sizes[0], (list, tuple)):
            # TODO change this
            sizes = tuple((s,) for s in sizes)
        if not isinstance(aspect_ratios[0], (list, tuple)):
            aspect_ratios = (aspect_ratios,) * len(sizes)

        assert len(sizes) == len(aspect_ratios)

        self.sizes = sizes
        self.aspect_ratios = aspect_ratios
        self.cell_anchors = None
        self._cache = {}

    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")):
        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor
        """
        compute anchor sizes
        Arguments:
            scales: sqrt(anchor_area)
            aspect_ratios: h/w ratios
            dtype: float32
            device: cpu/gpu
        """
        scales = torch.as_tensor(scales, dtype=dtype, device=device)
        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
        h_ratios = torch.sqrt(aspect_ratios)
        w_ratios = 1.0 / h_ratios

        # [r1, r2, r3]' * [s1, s2, s3]
        # number of elements is len(ratios)*len(scales)
        ws = (w_ratios[:, None] * scales[None, :]).view(-1)
        hs = (h_ratios[:, None] * scales[None, :]).view(-1)

        # left-top, right-bottom coordinate relative to anchor center(0, 0)
        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]
        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2

        return base_anchors.round()  # round 四舍五入

    def set_cell_anchors(self, dtype, device):
        # type: (torch.dtype, torch.device) -> None
        if self.cell_anchors is not None:
            cell_anchors = self.cell_anchors
            assert cell_anchors is not None
            # suppose that all anchors have the same device
            # which is a valid assumption in the current state of the codebase
            if cell_anchors[0].device == device:
                return

        # 根据提供的sizes和aspect_ratios生成anchors模板
        # anchors模板都是以(0, 0)为中心的anchor
        cell_anchors = [
            self.generate_anchors(sizes, aspect_ratios, dtype, device)
            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)
        ]
        self.cell_anchors = cell_anchors

    def num_anchors_per_location(self):
        # 计算每个预测特征层上每个滑动窗口的预测目标数
        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]

    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
    def grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """
        anchors position in grid coordinate axis map into origin image
        计算预测特征图对应原始图像上的所有anchors的坐标
        Args:
            grid_sizes: 预测特征矩阵的height和width
            strides: 预测特征矩阵上一步对应原始图像上的步距
        """
        anchors = []
        cell_anchors = self.cell_anchors
        assert cell_anchors is not None

        # 遍历每个预测特征层的grid_size，strides和cell_anchors
        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
            grid_height, grid_width = size
            stride_height, stride_width = stride
            device = base_anchors.device

            # For output anchor, compute [x_center, y_center, x_center, y_center]
            # shape: [grid_width] 对应原图上的x坐标(列)
            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width
            # shape: [grid_height] 对应原图上的y坐标(行)
            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height

            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)
            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵
            # shape: [grid_height, grid_width]
            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
            shift_x = shift_x.reshape(-1)
            shift_y = shift_y.reshape(-1)

            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量
            # shape: [grid_width*grid_height, 4]
            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)

            # For every (base anchor, output anchor) pair,
            # offset each zero-centered base anchor by the center of the output anchor.
            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)
            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)
            anchors.append(shifts_anchor.reshape(-1, 4))

        return anchors  # List[Tensor(all_num_anchors, 4)]

    def cached_grid_anchors(self, grid_sizes, strides):
        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
        """将计算得到的所有anchors信息进行缓存"""
        key = str(grid_sizes) + str(strides)
        # self._cache是字典类型
        if key in self._cache:
            return self._cache[key]
        anchors = self.grid_anchors(grid_sizes, strides)
        self._cache[key] = anchors
        return anchors

    def forward(self, image_list, feature_maps):
        # type: (ImageList, List[Tensor]) -> List[Tensor]
        # 获取每个预测特征层的尺寸(height, width)
        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])

        # 获取输入图像的height和width
        image_size = image_list.tensors.shape[-2:]

        # 获取变量类型和设备类型
        dtype, device = feature_maps[0].dtype, feature_maps[0].device

        # one step in feature map equate n pixel stride in origin image
        # 计算特征层上的一步等于原始图像上的步长
        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),
                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]

        # 根据提供的sizes和aspect_ratios生成anchors模板
        self.set_cell_anchors(dtype, device)

        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）
        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息
        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)

        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
        # 遍历一个batch中的每张图像
        for i, (image_height, image_width) in enumerate(image_list.image_sizes):
            anchors_in_image = []
            # 遍历每张预测特征图映射回原图的anchors坐标信息
            for anchors_per_feature_map in anchors_over_all_feature_maps:
                anchors_in_image.append(anchors_per_feature_map)
            anchors.append(anchors_in_image)
        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起
        # anchors是个list，每个元素为一张图像的所有anchors信息
        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
        # Clear the cache in case that memory leaks.
        self._cache.clear()
        return anchors


class RPNHead(nn.Module):
    """
    add a RPN head with classification and regression
    通过滑动窗口计算预测目标概率与bbox regression参数

    Arguments:
        in_channels: number of channels of the input feature
        num_anchors: number of anchors to be predicted
    """

    def __init__(self, in_channels, num_anchors):
        super(RPNHead, self).__init__()
        # 3x3 滑动窗口
        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
        # 计算预测的目标分数（这里的目标只是指前景或者背景）
        self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)
        # 计算预测的目标bbox regression参数
        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1)

        for layer in self.children():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight, std=0.01)
                torch.nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
        logits = []
        bbox_reg = []
        for i, feature in enumerate(x):
            t = F.relu(self.conv(feature))
            logits.append(self.cls_logits(t))
            bbox_reg.append(self.bbox_pred(t))
        return logits, bbox_reg


def permute_and_flatten(layer, N, A, C, H, W):
    # type: (Tensor, int, int, int, int, int) -> Tensor
    """
    调整tensor顺序，并进行reshape
    Args:
        layer: 预测特征层上预测的目标概率或bboxes regression参数
        N: batch_size
        A: anchors_num_per_position
        C: classes_num or 4(bbox coordinate)
        H: height
        W: width

    Returns:
        layer: 调整tensor顺序，并reshape后的结果[N, -1, C]
    """
    # view和reshape功能是一样的，先展平所有元素在按照给定shape排列
    # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数
    # reshape则不需要依赖目标tensor是否在内存中是连续的
    # [batch_size, anchors_num_per_position * (C or 4), height, width]
    layer = layer.view(N, -1, C,  H, W)
    # 调换tensor维度
    layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]
    layer = layer.reshape(N, -1, C)
    return layer


def concat_box_prediction_layers(box_cls, box_regression):
    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    对box_cla和box_regression两个list中的每个预测特征层的预测信息
    的tensor排列顺序以及shape进行调整 -> [N, -1, C]
    Args:
        box_cls: 每个预测特征层上的预测目标概率
        box_regression: 每个预测特征层上的预测目标bboxes regression参数

    Returns:

    """
    box_cls_flattened = []
    box_regression_flattened = []

    # 遍历每个预测特征层
    for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):
        # [batch_size, anchors_num_per_position * classes_num, height, width]
        # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景
        N, AxC, H, W = box_cls_per_level.shape
        # # [batch_size, anchors_num_per_position * 4, height, width]
        Ax4 = box_regression_per_level.shape[1]
        # anchors_num_per_position
        A = Ax4 // 4
        # classes_num
        C = AxC // A

        # [N, -1, C]
        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W)
        box_cls_flattened.append(box_cls_per_level)

        # [N, -1, C]
        box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W)
        box_regression_flattened.append(box_regression_per_level)

    box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)  # start_dim, end_dim
    box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)
    return box_cls, box_regression


class RegionProposalNetwork(torch.nn.Module):
    """
    Implements Region Proposal Network (RPN).

    Arguments:
        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
            maps.
        head (nn.Module): module that computes the objectness and regression deltas
        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training of the RPN.
        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
            considered as negative during training of the RPN.
        batch_size_per_image (int): number of anchors that are sampled during training of the RPN
            for computing the loss
        positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
        pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should
            contain two fields: training and testing, to allow for different values depending
            on training or evaluation
        post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should
            contain two fields: training and testing, to allow for different values depending
            on training or evaluation
        nms_thresh (float): NMS threshold used for postprocessing the RPN proposals

    """
    __annotations__ = {
        'box_coder': det_utils.BoxCoder,
        'proposal_matcher': det_utils.Matcher,
        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
        'pre_nms_top_n': Dict[str, int],
        'post_nms_top_n': Dict[str, int],
    }

    def __init__(self, anchor_generator, head,
                 fg_iou_thresh, bg_iou_thresh,
                 batch_size_per_image, positive_fraction,
                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_generator = anchor_generator
        self.head = head
        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        # use during training
        # 计算anchors与真实bbox的iou
        self.box_similarity = box_ops.box_iou

        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,  # 当iou大于fg_iou_thresh(0.7)时视为正样本
            bg_iou_thresh,  # 当iou小于bg_iou_thresh(0.3)时视为负样本
            allow_low_quality_matches=True
        )

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction  # 256, 0.5
        )

        # use during testing
        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
        self.score_thresh = score_thresh
        self.min_size = 1.

    def pre_nms_top_n(self):
        if self.training:
            return self._pre_nms_top_n['training']
        return self._pre_nms_top_n['testing']

    def post_nms_top_n(self):
        if self.training:
            return self._post_nms_top_n['training']
        return self._post_nms_top_n['testing']

    def assign_targets_to_anchors(self, anchors, targets):
        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]
        """
        计算每个anchors最匹配的gt，并划分为正样本，背景以及废弃的样本
        Args：
            anchors: (List[Tensor])
            targets: (List[Dict[Tensor])
        Returns:
            labels: 标记anchors归属类别（1, 0, -1分别对应正样本，背景，废弃的样本）
                    注意，在RPN中只有前景和背景，所有正样本的类别都是1，0代表背景
            matched_gt_boxes：与anchors匹配的gt
        """
        labels = []
        matched_gt_boxes = []
        # 遍历每张图像的anchors和targets
        for anchors_per_image, targets_per_image in zip(anchors, targets):
            gt_boxes = targets_per_image["boxes"]
            if gt_boxes.numel() == 0:
                device = anchors_per_image.device
                matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)
                labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)
            else:
                # 计算anchors与真实bbox的iou信息
                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
                match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)
                # 计算每个anchors与gt匹配iou最大的索引（如果iou<0.3索引置为-1，0.3<iou<0.7索引为-2）
                matched_idxs = self.proposal_matcher(match_quality_matrix)
                # get the targets corresponding GT for each proposal
                # NB: need to clamp the indices because we can have a single
                # GT in the image, and matched_idxs can be -2, which goes
                # out of bounds
                matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]

                labels_per_image = matched_idxs >= 0
                labels_per_image = labels_per_image.to(dtype=torch.float32)

                # background (negative examples)
                bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1
                labels_per_image[bg_indices] = 0.0

                # discard indices that are between thresholds
                inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2
                labels_per_image[inds_to_discard] = -1.0

            labels.append(labels_per_image)
            matched_gt_boxes.append(matched_gt_boxes_per_image)
        return labels, matched_gt_boxes

    def _get_top_n_idx(self, objectness, num_anchors_per_level):
        # type: (Tensor, List[int]) -> Tensor
        """
        获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值
        Args:
            objectness: Tensor(每张图像的预测目标概率信息 )
            num_anchors_per_level: List（每个预测特征层上的预测的anchors个数）
        Returns:

        """
        r = []  # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息
        offset = 0
        # 遍历每个预测特征层上的预测目标概率信息
        for ob in objectness.split(num_anchors_per_level, 1):
            if torchvision._is_tracing():
                num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())
            else:
                num_anchors = ob.shape[1]  # 预测特征层上的预测的anchors个数
                pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)

            # Returns the k largest elements of the given input tensor along a given dimension
            _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)
            r.append(top_n_idx + offset)
            offset += num_anchors
        return torch.cat(r, dim=1)

    def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]
        """
        筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标
        Args:
            proposals: 预测的bbox坐标
            objectness: 预测的目标概率
            image_shapes: batch中每张图片的size信息
            num_anchors_per_level: 每个预测特征层上预测anchors的数目

        Returns:

        """
        num_images = proposals.shape[0]
        device = proposals.device

        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        # Returns a tensor of size size filled with fill_value
        # levels负责记录分隔不同预测特征层上的anchors索引信息
        levels = [torch.full((n, ), idx, dtype=torch.int64, device=device)
                  for idx, n in enumerate(num_anchors_per_level)]
        levels = torch.cat(levels, 0)

        # Expand this tensor to the same size as objectness
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]  # [batch_size, 1]

        # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息
        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息
        proposals = proposals[batch_idx, top_n_idx]

        objectness_prob = torch.sigmoid(objectness)

        final_boxes = []
        final_scores = []
        # 遍历每张图像的相关预测信息
        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):
            # 调整预测的boxes信息，将越界的坐标调整到图片边界上
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)

            # 返回boxes满足宽，高都大于min_size的索引
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # 移除小概率boxes，参考下面这个链接
            # https://github.com/pytorch/vision/pull/3205
            keep = torch.where(torch.ge(scores, self.score_thresh))[0]  # ge: >=
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)

            # keep only topk scoring predictions
            keep = keep[: self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]

            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores

    def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):
        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
        """
        计算RPN损失，包括类别损失（前景与背景），bbox regression损失
        Arguments:
            objectness (Tensor)：预测的前景概率
            pred_bbox_deltas (Tensor)：预测的bbox regression
            labels (List[Tensor])：真实的标签 1, 0, -1（batch中每一张图片的labels对应List的一个元素中）
            regression_targets (List[Tensor])：真实的bbox regression

        Returns:
            objectness_loss (Tensor) : 类别损失
            box_loss (Tensor)：边界框回归损失
        """
        # 按照给定的batch_size_per_image, positive_fraction选择正负样本
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起，并获取非零位置的索引
        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0]
        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0]

        # 将所有正负样本索引拼接在一起
        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
        objectness = objectness.flatten()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        # 计算边界框回归损失
        box_loss = det_utils.smooth_l1_loss(
            pred_bbox_deltas[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        # 计算目标预测概率损失
        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss

    def forward(self,
                images,        # type: ImageList
                features,      # type: Dict[str, Tensor]
                targets=None   # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]
        """
        Arguments:
            images (ImageList): images for which we want to compute the predictions
            features (Dict[Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.

        Returns:
            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
                image.
            losses (Dict[Tensor]): the losses for the model during training. During
                testing, it is an empty dict.
        """
        # RPN uses all feature maps that are available
        # features是所有预测特征层组成的OrderedDict
        features = list(features.values())

        # 计算每个预测特征层上的预测目标概率和bboxes regression参数
        # objectness和pred_bbox_deltas都是list
        objectness, pred_bbox_deltas = self.head(features)

        # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size
        anchors = self.anchor_generator(images, features)

        # batch_size
        num_images = len(anchors)

        # numel() Returns the total number of elements in the input tensor.
        # 计算每个预测特征层上的对应的anchors数量
        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]

        # 调整内部tensor格式以及shape
        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness,
                                                                    pred_bbox_deltas)

        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标
        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)

        # 筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标
        boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)

        losses = {}
        if self.training:
            assert targets is not None
            # 计算每个anchors最匹配的gt，并将anchors进行分类，前景，背景以及废弃的anchors
            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
            # 结合anchors以及对应的gt，计算regression参数
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets
            )
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg
            }
        return boxes, losses


================================================
FILE: pytorch_object_detection/train_coco_dataset/network_files/transform.py
================================================
import math
from typing import List, Tuple, Dict, Optional

import torch
from torch import nn, Tensor
import torchvision

from .image_list import ImageList


@torch.jit.unused
def _resize_image_onnx(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    from torch.onnx import operators
    im_shape = operators.shape_as_tensor(image)[-2:]
    min_size = torch.min(im_shape).to(dtype=torch.float32)
    max_size = torch.max(im_shape).to(dtype=torch.float32)
    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)

    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image


def _resize_image(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    im_shape = torch.tensor(image.shape[-2:])
    min_size = float(torch.min(im_shape))    # 获取高宽中的最小值
    max_size = float(torch.max(im_shape))    # 获取高宽中的最大值
    scale_factor = self_min_size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例

    # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长
    if max_size * scale_factor > self_max_size:
        scale_factor = self_max_size / max_size  # 将缩放比例设为指定最大边长和图片最大边长之比

    # interpolate利用插值的方法缩放图片
    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]
    # bilinear只支持4D Tensor
    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image


class GeneralizedRCNNTransform(nn.Module):
    """
    Performs input / target transformation before feeding the data to a GeneralizedRCNN
    model.

    The transformations it perform are:
        - input normalization (mean subtraction and std division)
        - input / target resizing to match min_size / max_size

    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
    """

    def __init__(self, min_size, max_size, image_mean, image_std):
        super(GeneralizedRCNNTransform, self).__init__()
        if not isinstance(min_size, (list, tuple)):
            min_size = (min_size,)
        self.min_size = min_size      # 指定图像的最小边长范围
        self.max_size = max_size      # 指定图像的最大边长范围
        self.image_mean = image_mean  # 指定图像在标准化处理中的均值
        self.image_std = image_std    # 指定图像在标准化处理中的方差

    def normalize(self, image):
        """标准化处理"""
        dtype, device = image.dtype, image.device
        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
        # [:, None, None]: shape [3] -> [3, 1, 1]
        return (image - mean[:, None, None]) / std[:, None, None]

    def torch_choice(self, k):
        # type: (List[int]) -> int
        """
        Implements `random.choice` via torch ops so it can be compiled with
        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
        is fixed.
        """
        index = int(torch.empty(1).uniform_(0., float(len(k))).item())
        return k[index]

    def resize(self, image, target):
        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
        """
        将图片缩放到指定的大小范围内，并对应缩放bboxes信息
        Args:
            image: 输入的图片
            target: 输入图片的相关信息（包括bboxes信息）

        Returns:
            image: 缩放后的图片
            target: 缩放bboxes后的图片相关信息
        """
        # image shape is [channel, height, width]
        h, w = image.shape[-2:]

        if self.training:
            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size
        else:
            # FIXME assume for now that testing uses the largest scale
            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size

        if torchvision._is_tracing():
            image = _resize_image_onnx(image, size, float(self.max_size))
        else:
            image = _resize_image(image, size, float(self.max_size))

        if target is None:
            return image, target

        bbox = target["boxes"]
        # 根据图像的缩放比例来缩放bbox
        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])
        target["boxes"] = bbox

        return image, target

    # _onnx_batch_images() is an implementation of
    # batch_images() that is supported by ONNX tracing.
    @torch.jit.unused
    def _onnx_batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        max_size = []
        for i in range(images[0].dim()):
            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
            max_size.append(max_size_i)
        stride = size_divisible
        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size = tuple(max_size)

        # work around for
        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
        # which is not yet supported in onnx
        padded_imgs = []
        for img in images:
            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])
            padded_imgs.append(padded_img)

        return torch.stack(padded_imgs)

    def max_by_axis(self, the_list):
        # type: (List[List[int]]) -> List[int]
        maxes = the_list[0]
        for sublist in the_list[1:]:
            for index, item in enumerate(sublist):
                maxes[index] = max(maxes[index], item)
        return maxes

    def batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        """
        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）
        Args:
            images: 输入的一批图片
            size_divisible: 将图像高和宽调整到该数的整数倍

        Returns:
            batched_imgs: 打包成一个batch后的tensor数据
        """

        if torchvision._is_tracing():
            # batch_images() does not export well to ONNX
            # call _onnx_batch_images() instead
            return self._onnx_batch_images(images, size_divisible)

        # 分别计算一个batch中所有图片中的最大channel, height, width
        max_size = self.max_by_axis([list(img.shape) for img in images])

        stride = float(size_divisible)
        # max_size = list(max_size)
        # 将height向上调整到stride的整数倍
        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
        # 将width向上调整到stride的整数倍
        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)

        # [batch, channel, height, width]
        batch_shape = [len(images)] + max_size

        # 创建shape为batch_shape且值全部为0的tensor
        batched_imgs = images[0].new_full(batch_shape, 0)
        for img, pad_img in zip(images, batched_imgs):
            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变
            # 这样保证输入到网络中一个batch的每张图片的shape相同
            # copy_: Copies the elements from src into self tensor and returns self
            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)

        return batched_imgs

    def postprocess(self,
                    result,                # type: List[Dict[str, Tensor]]
                    image_shapes,          # type: List[Tuple[int, int]]
                    original_image_sizes   # type: List[Tuple[int, int]]
                    ):
        # type: (...) -> List[Dict[str, Tensor]]
        """
        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
        Args:
            result: list(dict), 网络的预测结果, len(result) == batch_size
            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size
            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size

        Returns:

        """
        if self.training:
            return result

        # 遍历每张图片的预测信息，将boxes信息还原回原尺度
        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
            boxes = pred["boxes"]
            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上
            result[i]["boxes"] = boxes
        return result

    def __repr__(self):
        """自定义输出实例化对象的信息，可通过print打印实例信息"""
        format_string = self.__class__.__name__ + '('
        _indent = '\n    '
        format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std)
        format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size,
                                                                                         self.max_size)
        format_string += '\n)'
        return format_string

    def forward(self,
                images,       # type: List[Tensor]
                targets=None  # type: Optional[List[Dict[str, Tensor]]]
                ):
        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]
        images = [img for img in images]
        for i in range(len(images)):
            image = images[i]
            target_index = targets[i] if targets is not None else None

            if image.dim() != 3:
                raise ValueError("images is expected to be a list of 3d tensors "
                                 "of shape [C, H, W], got {}".format(image.shape))
            image = self.normalize(image)                # 对图像进行标准化处理
            image, target_index = self.resize(image, target_index)   # 对图像和对应的bboxes缩放到指定范围
            images[i] = image
            if targets is not None and target_index is not None:
                targets[i] = target_index

        # 记录resize后的图像尺寸
        image_sizes = [img.shape[-2:] for img in images]
        images = self.batch_images(images)  # 将images打包成一个batch
        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])

        for image_size in image_sizes:
            assert len(image_size) == 2
            image_sizes_list.append((image_size[0], image_size[1]))

        image_list = ImageList(images, image_sizes_list)
        return image_list, targets


def resize_boxes(boxes, original_size, new_size):
    # type: (Tensor, List[int], List[int]) -> Tensor
    """
    将boxes参数根据图像的缩放情况进行相应缩放

    Arguments:
        original_size: 图像缩放前的尺寸
        new_size: 图像缩放后的尺寸
    """
    ratios = [
        torch.tensor(s, dtype=torch.float32, device=boxes.device) /
        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
        for s, s_orig in zip(new_size, original_size)
    ]
    ratios_height, ratios_width = ratios
    # Removes a tensor dimension, boxes [minibatch, 4]
    # Returns a tuple of all slices along a given dimension, already without it.
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    xmin = xmin * ratios_width
    xmax = xmax * ratios_width
    ymin = ymin * ratios_height
    ymax = ymax * ratios_height
    return torch.stack((xmin, ymin, xmax, ymax), dim=1)


================================================
FILE: pytorch_object_detection/train_coco_dataset/plot_curve.py
================================================
import datetime
import matplotlib.pyplot as plt


def plot_loss_and_lr(train_loss, learning_rate):
    try:
        x = list(range(len(train_loss)))
        fig, ax1 = plt.subplots(1, 1)
        ax1.plot(x, train_loss, 'r', label='loss')
        ax1.set_xlabel("step")
        ax1.set_ylabel("loss")
        ax1.set_title("Train Loss and lr")
        plt.legend(loc='best')

        ax2 = ax1.twinx()
        ax2.plot(x, learning_rate, label='lr')
        ax2.set_ylabel("learning rate")
        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
        plt.legend(loc='best')

        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
        plt.close()
        print("successful save loss curve! ")
    except Exception as e:
        print(e)


def plot_map(mAP):
    try:
        x = list(range(len(mAP)))
        plt.plot(x, mAP, label='mAp')
        plt.xlabel('epoch')
        plt.ylabel('mAP')
        plt.title('Eval mAP')
        plt.xlim(0, len(mAP))
        plt.legend(loc='best')
        plt.savefig('./mAP.png')
        plt.close()
        print("successful save mAP curve!")
    except Exception as e:
        print(e)


================================================
FILE: pytorch_object_detection/train_coco_dataset/predict.py
================================================
import os
import time
import json

import torch
import torchvision
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision.models.feature_extraction import create_feature_extractor

from network_files import FasterRCNN, AnchorsGenerator
from backbone import vgg, MobileNetV2, resnet50
from draw_box_utils import draw_objs


def create_model(num_classes):
    res50 = resnet50()
    backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"})
    backbone.out_channels = 1024

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    num_classes = 90  # 不包含背景
    model = create_model(num_classes=num_classes + 1)

    # load train weights
    weights_path = "./save_weights/model_25.pth"
    assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    # read class_indict
    label_json_path = './coco91_indices.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        category_index = json.load(f)

    # load image
    original_img = Image.open("./test.jpg")

    # from pil image to tensor, do not normalize image
    data_transform = transforms.Compose([transforms.ToTensor()])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        predictions = model(img.to(device))[0]
        t_end = time_synchronized()
        print("inference+NMS time: {}".format(t_end - t_start))

        predict_boxes = predictions["boxes"].to("cpu").numpy()
        predict_classes = predictions["labels"].to("cpu").numpy()
        predict_scores = predictions["scores"].to("cpu").numpy()

        if len(predict_boxes) == 0:
            print("没有检测到任何目标!")

        plot_img = draw_objs(original_img,
                             predict_boxes,
                             predict_classes,
                             predict_scores,
                             category_index=category_index,
                             box_thresh=0.5,
                             line_thickness=3,
                             font='arial.ttf',
                             font_size=20)
        plt.imshow(plot_img)
        plt.show()
        # 保存预测的图片结果
        plot_img.save("test_result.jpg")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/train_coco_dataset/requirements.txt
================================================
lxml
matplotlib
numpy
tqdm
pycocotools
Pillow
torch==1.10
torchvision==0.11.1


================================================
FILE: pytorch_object_detection/train_coco_dataset/results20220408-201436.txt
================================================
epoch:0 0.0504  0.1144  0.0362  0.0207  0.0601  0.0657  0.0702  0.1069  0.1087  0.0335  0.1153  0.1486  1.7430  0.005000
epoch:1 0.1138  0.2300  0.0994  0.0494  0.1279  0.1554  0.1303  0.1940  0.1980  0.0747  0.2051  0.2831  1.2282  0.005000
epoch:2 0.1461  0.2773  0.1394  0.0636  0.1635  0.1997  0.1530  0.2243  0.2288  0.0938  0.2435  0.3309  1.1391  0.005000
epoch:3 0.1669  0.3134  0.1642  0.0750  0.1843  0.2282  0.1680  0.2509  0.2561  0.1091  0.2705  0.3701  1.0902  0.005000
epoch:4 0.1857  0.3389  0.1828  0.0829  0.2074  0.2568  0.1830  0.2708  0.2756  0.1140  0.2937  0.3998  1.0581  0.005000
epoch:5 0.1908  0.3431  0.1930  0.0901  0.2128  0.2578  0.1839  0.2704  0.2753  0.1197  0.2927  0.3893  1.0337  0.005000
epoch:6 0.2044  0.3634  0.2077  0.0954  0.2247  0.2796  0.1947  0.2893  0.2956  0.1317  0.3138  0.4178  1.0127  0.005000
epoch:7 0.2068  0.3651  0.2099  0.0953  0.2269  0.2840  0.1959  0.2869  0.2926  0.1290  0.3093  0.4186  0.9945  0.005000
epoch:8 0.2171  0.3788  0.2218  0.0996  0.2470  0.2969  0.2012  0.3001  0.3071  0.1329  0.3375  0.4371  0.9806  0.005000
epoch:9 0.2146  0.3717  0.2207  0.0946  0.2315  0.3038  0.2011  0.2910  0.2962  0.1277  0.3091  0.4321  0.9691  0.005000
epoch:10 0.2280  0.3974  0.2345  0.1035  0.2535  0.3108  0.2118  0.3119  0.3182  0.1402  0.3429  0.4537  0.9567  0.005000
epoch:11 0.2332  0.3983  0.2443  0.1111  0.2534  0.3149  0.2136  0.3128  0.3190  0.1515  0.3417  0.4438  0.9450  0.005000
epoch:12 0.2400  0.4094  0.2486  0.1102  0.2622  0.3251  0.2175  0.3214  0.3289  0.1507  0.3521  0.4588  0.9369  0.005000
epoch:13 0.2449  0.4152  0.2563  0.1121  0.2741  0.3308  0.2234  0.3286  0.3363  0.1552  0.3703  0.4627  0.9286  0.005000
epoch:14 0.2466  0.4192  0.2542  0.1131  0.2765  0.3412  0.2220  0.3258  0.3322  0.1481  0.3627  0.4776  0.9203  0.005000
epoch:15 0.2492  0.4216  0.2569  0.1147  0.2781  0.3417  0.2254  0.3337  0.3402  0.1565  0.3666  0.4893  0.9116  0.005000
epoch:16 0.2689  0.4433  0.2814  0.1246  0.2963  0.3705  0.2384  0.3495  0.3569  0.1671  0.3864  0.5046  0.8616  0.000500
epoch:17 0.2719  0.4473  0.2865  0.1243  0.3021  0.3743  0.2399  0.3519  0.3593  0.1669  0.3931  0.5017  0.8515  0.000500
epoch:18 0.2738  0.4521  0.2857  0.1256  0.3048  0.3718  0.2416  0.3564  0.3645  0.1713  0.3996  0.5037  0.8472  0.000500
epoch:19 0.2759  0.4534  0.2893  0.1259  0.3094  0.3719  0.2448  0.3603  0.3681  0.1691  0.4073  0.5055  0.8439  0.000500
epoch:20 0.2720  0.4483  0.2838  0.1250  0.3021  0.3681  0.2400  0.3532  0.3613  0.1688  0.3944  0.4994  0.8417  0.000500
epoch:21 0.2748  0.4501  0.2904  0.1241  0.3019  0.3759  0.2421  0.3561  0.3641  0.1682  0.3941  0.5101  0.8378  0.000500
epoch:22 0.2754  0.4532  0.2896  0.1281  0.3064  0.3759  0.2419  0.3586  0.3660  0.1712  0.3993  0.5115  0.8304  0.000050
epoch:23 0.2757  0.4516  0.2907  0.1271  0.3068  0.3748  0.2423  0.3572  0.3650  0.1692  0.4005  0.5087  0.8307  0.000050
epoch:24 0.2750  0.4500  0.2888  0.1256  0.3017  0.3760  0.2411  0.3536  0.3611  0.1669  0.3894  0.5040  0.8299  0.000050
epoch:25 0.2769  0.4537  0.2903  0.1263  0.3082  0.3782  0.2424  0.3582  0.3663  0.1693  0.4020  0.5116  0.8281  0.000050


================================================
FILE: pytorch_object_detection/train_coco_dataset/train.py
================================================
import os
import datetime

import torch
import torchvision

import transforms
from network_files import FasterRCNN, AnchorsGenerator
from backbone import MobileNetV2, vgg, resnet50
from my_dataset import CocoDetection
from train_utils import train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups
from torchvision.models.feature_extraction import create_feature_extractor


def create_model(num_classes):
    # 以vgg16为backbone
    # 预训练权重地址： https://download.pytorch.org/models/vgg16-397923af.pth
    # vgg16 = vgg(model_name="vgg16", weights_path="./vgg16.pth")
    # backbone = create_feature_extractor(vgg16, return_nodes={"features.29": "0"})  # 删除feature中最后的maxpool层
    # backbone.out_channels = 512

    # 以resnet50为backbone
    # 预训练权重地址：https://download.pytorch.org/models/resnet50-19c8e357.pth
    res50 = resnet50()
    res50.load_state_dict(torch.load("./resnet50.pth", map_location="cpu"))
    backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"})
    backbone.out_channels = 1024

    # 以mobilenetv2为backbone
    # 预训练权重地址：https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    # backbone = MobileNetV2(weights_path="./mobilenet_v2.pth").features
    # backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    COCO_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> instances_train2017.json
    train_dataset = CocoDetection(COCO_root, "train", data_transform["train"])
    train_sampler = None

    # 是否按图片相似高宽比采样图片组成batch
    # 使用的话能够减小训练时所需GPU显存，默认使用
    if args.aspect_ratio_group_factor >= 0:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        # 统计所有图像高宽比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        # 每个batch图片从同一高宽比例区间中取
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    if train_sampler:
        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_sampler=train_batch_sampler,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)
    else:
        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        pin_memory=True,
                                                        num_workers=nw,
                                                        collate_fn=train_dataset.collate_fn)

    # load validation data set
    # coco2017 -> annotations -> instances_val2017.json
    val_dataset = CocoDetection(COCO_root, "val", data_transform["val"])
    val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  pin_memory=True,
                                                  num_workers=nw,
                                                  collate_fn=train_dataset.collate_fn)

    # create model num_classes equal background + classes
    model = create_model(num_classes=args.num_classes + 1)
    # print(model)

    model.to(device)

    train_loss = []
    learning_rate = []
    val_map = []

    # define optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=args.lr_steps,
                                                        gamma=args.lr_gamma)
    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch, printing every 50 iterations
        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,
                                              device, epoch, print_freq=50,
                                              warmup=True, scaler=scaler)
        train_loss.append(mean_loss.item())
        learning_rate.append(lr)

        # update the learning rate
        lr_scheduler.step()

        # evaluate on the test dataset
        coco_info = utils.evaluate(model, val_data_loader, device=device)

        # write into txt
        with open(results_file, "a") as f:
            # 写入的数据包括coco指标还有loss和learning rate
            result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
            txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
            f.write(txt + "\n")

        val_map.append(coco_info[1])  # pascal mAP

        # save weights
        save_files = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'epoch': epoch}
        if args.amp:
            save_files["scaler"] = scaler.state_dict()
        torch.save(save_files, "./save_weights/model_{}.pth".format(epoch))

    # plot loss and lr curve
    if len(train_loss) != 0 and len(learning_rate) != 0:
        from plot_curve import plot_loss_and_lr
        plot_loss_and_lr(train_loss, learning_rate)

    # plot mAP curve
    if len(val_map) != 0:
        from plot_curve import plot_map
        plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练设备类型
    parser.add_argument('--device', default='cuda:0', help='device')
    # 训练数据集的根目录
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')
    # 若需要接着上次训练，则指定上次训练保存权重文件地址
    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    # 学习率
    parser.add_argument('--lr', default=0.005, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练的batch size(如果内存/GPU显存充裕，建议设置更大)
    parser.add_argument('--batch_size', default=4, type=int, metavar='N',
                        help='batch size when training.')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()
    print(args)

    # 检查保存权重文件夹是否存在，不存在则创建
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/train_coco_dataset/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch
import torchvision

import transforms
from my_dataset import CocoDetection
from backbone import resnet50
from network_files import FasterRCNN, AnchorsGenerator
import train_utils.train_eval_utils as utils
from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir
from torchvision.models.feature_extraction import create_feature_extractor


def create_model(num_classes):
    # 以resnet50为backbone
    # 预训练权重地址：https://download.pytorch.org/models/resnet50-19c8e357.pth
    res50 = resnet50()
    res50.load_state_dict(torch.load("./resnet50.pth", map_location="cpu"))
    backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"})
    backbone.out_channels = 1024

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # Data loading code
    print("Loading data")

    data_transform = {
        "train": transforms.Compose([transforms.ToTensor(),
                                     transforms.RandomHorizontalFlip(0.5)]),
        "val": transforms.Compose([transforms.ToTensor()])
    }

    COCO_root = args.data_path

    # load train data set
    # coco2017 -> annotations -> instances_train2017.json
    train_dataset = CocoDetection(COCO_root, "train", data_transform["train"])

    # load validation data set
    # coco2017 -> annotations -> instances_val2017.json
    val_dataset = CocoDetection(COCO_root, "val", data_transform["val"])

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    if args.aspect_ratio_group_factor >= 0:
        # 统计所有图像比例在bins区间中的位置索引
        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + classes
    model = create_model(num_classes=args.num_classes + 1)
    model.to(device)

    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp and "scaler" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler"])

    train_loss = []
    learning_rate = []
    val_map = []

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,
                                              device, epoch, args.print_freq,
                                              warmup=True, scaler=scaler)

        # update learning rate
        lr_scheduler.step()

        # evaluate after every epoch
        coco_info = utils.evaluate(model, data_loader_test, device=device)

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            train_loss.append(mean_loss.item())
            learning_rate.append(lr)
            val_map.append(coco_info[1])  # pascal mAP

            # write into txt
            with open(results_file, "a") as f:
                # 写入的数据包括coco指标还有loss和learning rate
                result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_files = {'model': model_without_ddp.state_dict(),
                          'optimizer': optimizer.state_dict(),
                          'lr_scheduler': lr_scheduler.state_dict(),
                          'args': args,
                          'epoch': epoch}
            if args.amp:
                save_files["scaler"] = scaler.state_dict()
            save_on_master(save_files,
                           os.path.join(args.output_dir, f'model_{epoch}.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    if args.rank in [-1, 0]:
        # plot loss and lr curve
        if len(train_loss) != 0 and len(learning_rate) != 0:
            from plot_curve import plot_loss_and_lr
            plot_loss_and_lr(train_loss, learning_rate)

        # plot mAP curve
        if len(val_map) != 0:
            from plot_curve import plot_map
            plot_map(val_map)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(coco2017)
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU
    parser.add_argument('--lr', default=0.01, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 针对torch.optim.lr_scheduler.StepLR的参数
    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs')
    # 针对torch.optim.lr_scheduler.MultiStepLR的参数
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)

    # 开启的进程数(注意不是线程)
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    parser.add_argument("--sync-bn", dest="sync_bn", help="Use sync batch norm", type=bool, default=False)
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_object_detection/train_coco_dataset/train_utils/__init__.py
================================================
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .distributed_utils import init_distributed_mode, save_on_master, mkdir
from .coco_eval import EvalCOCOMetric


================================================
FILE: pytorch_object_detection/train_coco_dataset/train_utils/coco_eval.py
================================================
import json
import copy

import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import pycocotools.mask as mask_util
from .distributed_utils import all_gather, is_main_process


def merge(img_ids, eval_results):
    """将多个进程之间的数据汇总在一起"""
    all_img_ids = all_gather(img_ids)
    all_eval_results = all_gather(eval_results)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_results = []
    for p in all_eval_results:
        merged_eval_results.extend(p)

    merged_img_ids = np.array(merged_img_ids)

    # keep only unique (and in sorted order) images
    # 去除重复的图片索引，多GPU训练时为了保证每个进程的训练图片数量相同，可能将一张图片分配给多个进程
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_results = [merged_eval_results[i] for i in idx]

    return list(merged_img_ids), merged_eval_results


class EvalCOCOMetric:
    def __init__(self,
                 coco: COCO = None,
                 iou_type: str = None,
                 results_file_name: str = "predict_results.json",
                 classes_mapping: dict = None):
        self.coco = copy.deepcopy(coco)
        self.img_ids = []  # 记录每个进程处理图片的ids
        self.results = []
        self.aggregation_results = None
        self.classes_mapping = classes_mapping
        self.coco_evaluator = None
        assert iou_type in ["bbox", "segm", "keypoints"]
        self.iou_type = iou_type
        self.results_file_name = results_file_name

    def prepare_for_coco_detection(self, targets, outputs):
        """将预测的结果转换成COCOeval指定的格式，针对目标检测任务"""
        # 遍历每张图像的预测结果
        for target, output in zip(targets, outputs):
            if len(output) == 0:
                continue

            img_id = int(target["image_id"])
            if img_id in self.img_ids:
                # 防止出现重复的数据
                continue
            self.img_ids.append(img_id)
            per_image_boxes = output["boxes"]
            # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]
            # 而我们预测的box格式是[x_min, y_min, x_max, y_max]，所以需要转下格式
            per_image_boxes[:, 2:] -= per_image_boxes[:, :2]
            per_image_classes = output["labels"].tolist()
            per_image_scores = output["scores"].tolist()

            res_list = []
            # 遍历每个目标的信息
            for object_score, object_class, object_box in zip(
                    per_image_scores, per_image_classes, per_image_boxes):
                object_score = float(object_score)
                class_idx = int(object_class)
                if self.classes_mapping is not None:
                    class_idx = int(self.classes_mapping[str(class_idx)])
                # We recommend rounding coordinates to the nearest tenth of a pixel
                # to reduce resulting JSON file size.
                object_box = [round(b, 2) for b in object_box.tolist()]

                res = {"image_id": img_id,
                       "category_id": class_idx,
                       "bbox": object_box,
                       "score": round(object_score, 3)}
                res_list.append(res)
            self.results.append(res_list)

    def prepare_for_coco_segmentation(self, targets, outputs):
        """将预测的结果转换成COCOeval指定的格式，针对实例分割任务"""
        # 遍历每张图像的预测结果
        for target, output in zip(targets, outputs):
            if len(output) == 0:
                continue

            img_id = int(target["image_id"])
            if img_id in self.img_ids:
                # 防止出现重复的数据
                continue

            self.img_ids.append(img_id)
            per_image_masks = output["masks"]
            per_image_classes = output["labels"].tolist()
            per_image_scores = output["scores"].tolist()

            masks = per_image_masks > 0.5

            res_list = []
            # 遍历每个目标的信息
            for mask, label, score in zip(masks, per_image_classes, per_image_scores):
                rle = mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                rle["counts"] = rle["counts"].decode("utf-8")

                class_idx = int(label)
                if self.classes_mapping is not None:
                    class_idx = int(self.classes_mapping[str(class_idx)])

                res = {"image_id": img_id,
                       "category_id": class_idx,
                       "segmentation": rle,
                       "score": round(score, 3)}
                res_list.append(res)
            self.results.append(res_list)

    def update(self, targets, outputs):
        if self.iou_type == "bbox":
            self.prepare_for_coco_detection(targets, outputs)
        elif self.iou_type == "segm":
            self.prepare_for_coco_segmentation(targets, outputs)
        else:
            raise KeyError(f"not support iou_type: {self.iou_type}")

    def synchronize_results(self):
        # 同步所有进程中的数据
        eval_ids, eval_results = merge(self.img_ids, self.results)
        self.aggregation_results = {"img_ids": eval_ids, "results": eval_results}

        # 主进程上保存即可
        if is_main_process():
            results = []
            [results.extend(i) for i in eval_results]
            # write predict results into json file
            json_str = json.dumps(results, indent=4)
            with open(self.results_file_name, 'w') as json_file:
                json_file.write(json_str)

    def evaluate(self):
        # 只在主进程上评估即可
        if is_main_process():
            # accumulate predictions from all images
            coco_true = self.coco
            coco_pre = coco_true.loadRes(self.results_file_name)

            self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)

            self.coco_evaluator.evaluate()
            self.coco_evaluator.accumulate()
            print(f"IoU metric: {self.iou_type}")
            self.coco_evaluator.summarize()

            coco_info = self.coco_evaluator.stats.tolist()  # numpy to list
            return coco_info
        else:
            return None


================================================
FILE: pytorch_object_detection/train_coco_dataset/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    收集各个进程中的数据
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()  # 进程数
    if world_size == 1:
        return [data]

    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,

                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_object_detection/train_coco_dataset/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k+1等份
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_object_detection/train_coco_dataset/train_utils/train_eval_utils.py
================================================
import math
import sys
import time

import torch

import train_utils.distributed_utils as utils
from .coco_eval import EvalCOCOMetric


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq=50, warmup=False, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    mloss = torch.zeros(1).to(device)  # mean losses
    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purpose
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        # 记录训练损失
        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses

        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, device):
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    det_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type="bbox", results_file_name="det_results.json")
    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)

        # 当使用CPU时，跳过GPU相关指令
        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        det_metric.update(targets, outputs)
        metric_logger.update(model_time=model_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)

    # 同步所有进程中的数据
    det_metric.synchronize_results()

    if utils.is_main_process():
        coco_info = det_metric.evaluate()
    else:
        coco_info = None

    return coco_info


================================================
FILE: pytorch_object_detection/train_coco_dataset/transforms.py
================================================
import random
from torchvision.transforms import functional as F


class Compose(object):
    """组合多个transform函数"""
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class ToTensor(object):
    """将PIL图像转为Tensor"""
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target


class RandomHorizontalFlip(object):
    """随机水平翻转图像以及bboxes"""
    def __init__(self, prob=0.5):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)  # 水平翻转图片
            bbox = target["boxes"]
            # bbox: xmin, ymin, xmax, ymax
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息
            target["boxes"] = bbox
        return image, target


================================================
FILE: pytorch_object_detection/train_coco_dataset/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
以及每个类别的mAP(IoU=0.5)
"""

import os
import json

import torch
import torchvision
from tqdm import tqdm
import numpy as np
from torchvision.models.feature_extraction import create_feature_extractor

import transforms
from network_files import FasterRCNN, AnchorsGenerator
from my_dataset import CocoDetection
from backbone import resnet50
from train_utils import EvalCOCOMetric


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 12, [""] * 12
    stats[0], print_list[0] = _summarize(1)
    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])
    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])
    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])
    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {
        "val": transforms.Compose([transforms.ToTensor()])
    }

    # read class_indict
    label_json_path = './coco91_indices.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        category_index = json.load(f)

    coco_root = parser_data.data_path

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = CocoDetection(coco_root, "val", data_transform["val"])
    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     pin_memory=True,
                                                     num_workers=nw,
                                                     collate_fn=val_dataset.collate_fn)

    # create model
    res50 = resnet50()
    backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"})
    backbone.out_channels = 1024

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    # num_classes equal 80 + background classes
    model = FasterRCNN(backbone=backbone,
                       num_classes=parser_data.num_classes + 1,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights_path
    assert os.path.exists(weights_path), "not found {} file.".format(weights_path)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    # print(model)

    model.to(device)

    # evaluate on the val dataset
    cpu_device = torch.device("cpu")

    det_metric = EvalCOCOMetric(val_dataset.coco, "bbox", "det_results.json")
    model.eval()
    with torch.no_grad():
        for image, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            image = list(img.to(device) for img in image)

            # inference
            outputs = model(image)
            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
            det_metric.update(targets, outputs)

    det_metric.synchronize_results()
    det_metric.evaluate()

    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(det_metric.coco_evaluator)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    classes = [v for v in category_index.values() if v != "N/A"]
    for i in range(len(classes)):
        stats, _ = summarize(det_metric.coco_evaluator, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(classes[i], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = ["COCO results:",
                        print_coco,
                        "",
                        "mAP(IoU=0.5) for each category:",
                        print_voc]
        f.write("\n".join(record_lines))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda', help='device')

    # 检测目标类别数
    parser.add_argument('--num-classes', type=int, default=90, help='number of classes')

    # 数据集的根目录(coco2017根目录)
    parser.add_argument('--data-path', default='/data/coco2017', help='dataset root')

    # 训练好的权重文件
    parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights')

    # batch size
    parser.add_argument('--batch_size', default=1, type=int, metavar='N',
                        help='batch size when validation.')

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_object_detection/yolov3_spp/README.md
================================================
# YOLOv3 SPP
## 该项目源自[ultralytics/yolov3](https://github.com/ultralytics/yolov3)
## 1 环境配置：
* Python3.6或者3.7
* Pytorch1.7.1(注意：必须是1.6.0或以上，因为使用官方提供的混合精度训练1.6.0后才支持)
* pycocotools(Linux: `pip install pycocotools`;   
  Windows: `pip install pycocotools-windows`(不需要额外安装vs))
* 更多环境配置信息，请查看`requirements.txt`文件
* 最好使用GPU训练

## 2 文件结构：
```
  ├── cfg: 配置文件目录
  │    ├── hyp.yaml: 训练网络的相关超参数
  │    └── yolov3-spp.cfg: yolov3-spp网络结构配置 
  │ 
  ├── data: 存储训练时数据集相关信息缓存
  │    └── pascal_voc_classes.json: pascal voc数据集标签
  │ 
  ├── runs: 保存训练过程中生成的所有tensorboard相关文件
  ├── build_utils: 搭建训练网络时使用到的工具
  │     ├── datasets.py: 数据读取以及预处理方法
  │     ├── img_utils.py: 部分图像处理方法
  │     ├── layers.py: 实现的一些基础层结构
  │     ├── parse_config.py: 解析yolov3-spp.cfg文件
  │     ├── torch_utils.py: 使用pytorch实现的一些工具
  │     └── utils.py: 训练网络过程中使用到的一些方法
  │
  ├── train_utils: 训练验证网络时使用到的工具(包括多GPU训练以及使用cocotools)
  ├── weights: 所有相关预训练权重(下面会给出百度云的下载地址)
  ├── model.py: 模型搭建文件
  ├── train.py: 针对单GPU或者CPU的用户使用
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── trans_voc2yolo.py: 将voc数据集标注信息(.xml)转为yolo标注格式(.txt)
  ├── calculate_dataset.py: 1)统计训练集和验证集的数据并生成相应.txt文件
  │                         2)创建data.data文件
  │                         3)根据yolov3-spp.cfg结合数据集类别数创建my_yolov3.cfg文件
  └── predict_test.py: 简易的预测脚本，使用训练好的权重进行预测测试
```

## 3 训练数据的准备以及目录结构
* 这里建议标注数据时直接生成yolo格式的标签文件`.txt`，推荐使用免费开源的标注软件(支持yolo格式)，[https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg)
* 如果之前已经标注成pascal voc的`.xml`格式了也没关系，我写了个voc转yolo格式的转化脚本，4.1会讲怎么使用
* 测试图像时最好将图像缩放到32的倍数
* 标注好的数据集请按照以下目录结构进行摆放:
```
├── my_yolo_dataset 自定义数据集根目录
│         ├── train   训练集目录
│         │     ├── images  训练集图像目录
│         │     └── labels  训练集标签目录 
│         └── val    验证集目录
│               ├── images  验证集图像目录
│               └── labels  验证集标签目录            
```

## 4 利用标注好的数据集生成一系列相关准备文件，为了方便我写了个脚本，通过脚本可直接生成。也可参考原作者的[教程](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data)
```
├── data 利用数据集生成的一系列相关准备文件目录
│    ├── my_train_data.txt:  该文件里存储的是所有训练图片的路径地址
│    ├── my_val_data.txt:  该文件里存储的是所有验证图片的路径地址
│    ├── my_data_label.names:  该文件里存储的是所有类别的名称，一个类别对应一行(这里会根据`.json`文件自动生成)
│    └── my_data.data:  该文件里记录的是类别数类别信息、train以及valid对应的txt文件
```

### 4.1 将VOC标注数据转为YOLO标注数据(如果你的数据已经是YOLO格式了，可跳过该步骤)
* 使用`trans_voc2yolo.py`脚本进行转换，并在`./data/`文件夹下生成`my_data_label.names`标签文件，
* 执行脚本前，需要根据自己的路径修改以下参数
```python
# voc数据集根目录以及版本
voc_root = "./VOCdevkit"
voc_version = "VOC2012"

# 转换的训练集以及验证集对应txt文件，对应VOCdevkit/VOC2012/ImageSets/Main文件夹下的txt文件
train_txt = "train.txt"
val_txt = "val.txt"

# 转换后的文件保存目录
save_file_root = "/home/wz/my_project/my_yolo_dataset"

# label标签对应json文件
label_json_path = './data/pascal_voc_classes.json'
```
* 生成的`my_data_label.names`标签文件格式如下
```text
aeroplane
bicycle
bird
boat
bottle
bus
...
```

### 4.2 根据摆放好的数据集信息生成一系列相关准备文件
* 使用`calculate_dataset.py`脚本生成`my_train_data.txt`文件、`my_val_data.txt`文件以及`my_data.data`文件，并生成新的`my_yolov3.cfg`文件
* 执行脚本前，需要根据自己的路径修改以下参数
```python
# 训练集的labels目录路径
train_annotation_dir = "/home/wz/my_project/my_yolo_dataset/train/labels"
# 验证集的labels目录路径
val_annotation_dir = "/home/wz/my_project/my_yolo_dataset/val/labels"
# 上一步生成的my_data_label.names文件路径(如果没有该文件，可以自己手动编辑一个txt文档，然后重命名为.names格式即可)
classes_label = "./data/my_data_label.names"
# 原始yolov3-spp.cfg网络结构配置文件
cfg_path = "./cfg/yolov3-spp.cfg"
```

## 5 预训练权重下载地址（下载后放入weights文件夹中）：
* `yolov3-spp-ultralytics-416.pt`: 链接: https://pan.baidu.com/s/1cK3USHKxDx-d5dONij52lA  密码: r3vm
* `yolov3-spp-ultralytics-512.pt`: 链接: https://pan.baidu.com/s/1k5yeTZZNv8Xqf0uBXnUK-g  密码: e3k1
* `yolov3-spp-ultralytics-608.pt`: 链接: https://pan.baidu.com/s/1GI8BA0wxeWMC0cjrC01G7Q  密码: ma3t
* `yolov3spp-voc-512.pt` **(这是我在视频演示训练中得到的权重)**: 链接: https://pan.baidu.com/s/1aFAtaHlge0ieFtQ9nhmj3w  密码: 8ph3
 
 
## 6 数据集，本例程使用的是PASCAL VOC2012数据集
* `Pascal VOC2012` train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK

## 7 使用方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要使用单GPU训练或者使用CPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率

## 如果对YOLOv3 SPP网络原理不是很理解可参考我的bilibili
[https://www.bilibili.com/video/BV1yi4y1g7ro?p=3](https://www.bilibili.com/video/BV1yi4y1g7ro?p=3)

## 进一步了解该项目，以及对YOLOv3 SPP代码的分析可参考我的bilibili
[https://www.bilibili.com/video/BV1t54y1C7ra](https://www.bilibili.com/video/BV1t54y1C7ra)

## YOLOv3 SPP框架图
![yolov3spp](yolov3spp.png) 


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/__init__.py
================================================


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/datasets.py
================================================
import math
import os
import random
import shutil
from pathlib import Path

import cv2
import numpy as np
import torch
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm

from build_utils.utils import xyxy2xywh, xywh2xyxy

help_url = 'https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data'
img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']


# get orientation in exif tag
# 找到图像exif信息中对应旋转信息的key值
for orientation in ExifTags.TAGS.keys():
    if ExifTags.TAGS[orientation] == "Orientation":
        break


def exif_size(img):
    """
    获取图像的原始img size
    通过exif的orientation信息判断图像是否有旋转，如果有旋转则返回旋转前的size
    :param img: PIL图片
    :return: 原始图像的size
    """
    # Returns exif-corrected PIL size
    s = img.size  # (width, height)
    try:
        rotation = dict(img._getexif().items())[orientation]
        if rotation == 6:  # rotation 270  顺时针翻转90度
            s = (s[1], s[0])
        elif rotation == 8:  # ratation 90  逆时针翻转90度
            s = (s[1], s[0])
    except:
        # 如果图像的exif信息中没有旋转信息，则跳过
        pass

    return s


class LoadImagesAndLabels(Dataset):  # for training/testing
    def __init__(self,
                 path,   # 指向data/my_train_data.txt路径或data/my_val_data.txt路径
                 # 这里设置的是预处理后输出的图片尺寸
                 # 当为训练集时，设置的是训练过程中(开启多尺度)的最大尺寸
                 # 当为验证集时，设置的是最终使用的网络大小
                 img_size=416,
                 batch_size=16,
                 augment=False,  # 训练集设置为True(augment_hsv)，验证集设置为False
                 hyp=None,  # 超参数字典，其中包含图像增强会使用到的超参数
                 rect=False,  # 是否使用rectangular training
                 cache_images=False,  # 是否缓存图片到内存中
                 single_cls=False, pad=0.0, rank=-1):

        try:
            path = str(Path(path))
            # parent = str(Path(path).parent) + os.sep
            if os.path.isfile(path):  # file
                # 读取对应my_train/val_data.txt文件，读取每一行的图片路劲信息
                with open(path, "r") as f:
                    f = f.read().splitlines()
            else:
                raise Exception("%s does not exist" % path)

            # 检查每张图片后缀格式是否在支持的列表中，保存支持的图像路径
            # img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
            self.img_files = [x for x in f if os.path.splitext(x)[-1].lower() in img_formats]
            self.img_files.sort()  # 防止不同系统排序不同，导致shape文件出现差异
        except Exception as e:
            raise FileNotFoundError("Error loading data from {}. {}".format(path, e))

        # 如果图片列表中没有图片，则报错
        n = len(self.img_files)
        assert n > 0, "No images found in %s. See %s" % (path, help_url)

        # batch index
        # 将数据划分到一个个batch中
        bi = np.floor(np.arange(n) / batch_size).astype(np.int)
        # 记录数据集划分后的总batch数
        nb = bi[-1] + 1  # number of batches

        self.n = n  # number of images 图像总数目
        self.batch = bi  # batch index of image 记录哪些图片属于哪个batch
        self.img_size = img_size  # 这里设置的是预处理后输出的图片尺寸
        self.augment = augment  # 是否启用augment_hsv
        self.hyp = hyp  # 超参数字典，其中包含图像增强会使用到的超参数
        self.rect = rect  # 是否使用rectangular training
        # 注意: 开启rect后，mosaic就默认关闭
        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)

        # Define labels
        # 遍历设置图像对应的label路径
        # (./my_yolo_dataset/train/images/2009_004012.jpg) -> (./my_yolo_dataset/train/labels/2009_004012.txt)
        self.label_files = [x.replace("images", "labels").replace(os.path.splitext(x)[-1], ".txt")
                            for x in self.img_files]

        # Read image shapes (wh)
        # 查看data文件下是否缓存有对应数据集的.shapes文件，里面存储了每张图像的width, height
        sp = path.replace(".txt", ".shapes")  # shapefile path
        try:
            with open(sp, "r") as f:  # read existing shapefile
                s = [x.split() for x in f.read().splitlines()]
                # 判断现有的shape文件中的行数(图像个数)是否与当前数据集中图像个数相等
                # 如果不相等则认为是不同的数据集，故重新生成shape文件
                assert len(s) == n, "shapefile out of aync"
        except Exception as e:
            # print("read {} failed [{}], rebuild {}.".format(sp, e, sp))
            # tqdm库会显示处理的进度
            # 读取每张图片的size信息
            if rank in [-1, 0]:
                image_files = tqdm(self.img_files, desc="Reading image shapes")
            else:
                image_files = self.img_files
            s = [exif_size(Image.open(f)) for f in image_files]
            # 将所有图片的shape信息保存在.shape文件中
            np.savetxt(sp, s, fmt="%g")  # overwrite existing (if any)

        # 记录每张图像的原始尺寸
        self.shapes = np.array(s, dtype=np.float64)

        # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
        # 如果为ture，训练网络时，会使用类似原图像比例的矩形(让最长边为img_size)，而不是img_size x img_size
        # 注意: 开启rect后，mosaic就默认关闭
        if self.rect:
            # Sort by aspect ratio
            s = self.shapes  # wh
            # 计算每个图片的高/宽比
            ar = s[:, 1] / s[:, 0]  # aspect ratio
            # argsort函数返回的是数组值从小到大的索引值
            # 按照高宽比例进行排序，这样后面划分的每个batch中的图像就拥有类似的高宽比
            irect = ar.argsort()
            # 根据排序后的顺序重新设置图像顺序、标签顺序以及shape顺序
            self.img_files = [self.img_files[i] for i in irect]
            self.label_files = [self.label_files[i] for i in irect]
            self.shapes = s[irect]  # wh
            ar = ar[irect]

            # set training image shapes
            # 计算每个batch采用的统一尺度
            shapes = [[1, 1]] * nb  # nb: number of batches
            for i in range(nb):
                ari = ar[bi == i]  # bi: batch index
                # 获取第i个batch中，最小和最大高宽比
                mini, maxi = ari.min(), ari.max()

                # 如果高/宽小于1(w > h)，将w设为img_size
                if maxi < 1:
                    shapes[i] = [maxi, 1]
                # 如果高/宽大于1(w < h)，将h设置为img_size
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]
            # 计算每个batch输入网络的shape值(向上设置为32的整数倍)
            self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32

        # cache labels
        self.imgs = [None] * n  # n为图像总数
        # label: [class, x, y, w, h] 其中的xywh都为相对值
        self.labels = [np.zeros((0, 5), dtype=np.float32)] * n
        extract_bounding_boxes, labels_loaded = False, False
        nm, nf, ne, nd = 0, 0, 0, 0  # number mission, found, empty, duplicate
        # 这里分别命名是为了防止出现rect为False/True时混用导致计算的mAP错误
        # 当rect为True时会对self.images和self.labels进行从新排序
        if rect is True:
            np_labels_path = str(Path(self.label_files[0]).parent) + ".rect.npy"  # saved labels in *.npy file
        else:
            np_labels_path = str(Path(self.label_files[0]).parent) + ".norect.npy"

        if os.path.isfile(np_labels_path):
            x = np.load(np_labels_path, allow_pickle=True)
            if len(x) == n:
                # 如果载入的缓存标签个数与当前计算的图像数目相同则认为是同一数据集，直接读缓存
                self.labels = x
                labels_loaded = True

        # 处理进度条只在第一个进程中显示
        if rank in [-1, 0]:
            pbar = tqdm(self.label_files)
        else:
            pbar = self.label_files

        # 遍历载入标签文件
        for i, file in enumerate(pbar):
            if labels_loaded is True:
                # 如果存在缓存直接从缓存读取
                l = self.labels[i]
            else:
                # 从文件读取标签信息
                try:
                    with open(file, "r") as f:
                        # 读取每一行label，并按空格划分数据
                        l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
                except Exception as e:
                    print("An error occurred while loading the file {}: {}".format(file, e))
                    nm += 1  # file missing
                    continue

            # 如果标注信息不为空的话
            if l.shape[0]:
                # 标签信息每行必须是五个值[class, x, y, w, h]
                assert l.shape[1] == 5, "> 5 label columns: %s" % file
                assert (l >= 0).all(), "negative labels: %s" % file
                assert (l[:, 1:] <= 1).all(), "non-normalized or out of bounds coordinate labels: %s" % file

                # 检查每一行，看是否有重复信息
                if np.unique(l, axis=0).shape[0] < l.shape[0]:  # duplicate rows
                    nd += 1
                if single_cls:
                    l[:, 0] = 0  # force dataset into single-class mode

                self.labels[i] = l
                nf += 1  # file found

                # Extract object detection boxes for a second stage classifier
                if extract_bounding_boxes:
                    p = Path(self.img_files[i])
                    img = cv2.imread(str(p))
                    h, w = img.shape[:2]
                    for j, x in enumerate(l):
                        f = "%s%sclassifier%s%g_%g_%s" % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
                        if not os.path.exists(Path(f).parent):
                            os.makedirs(Path(f).parent)  # make new output folder

                        # 将相对坐标转为绝对坐标
                        # b: x, y, w, h
                        b = x[1:] * [w, h, w, h]  # box
                        # 将宽和高设置为宽和高中的最大值
                        b[2:] = b[2:].max()  # rectangle to square
                        # 放大裁剪目标的宽高
                        b[2:] = b[2:] * 1.3 + 30  # pad
                        # 将坐标格式从 x,y,w,h -> xmin,ymin,xmax,ymax
                        b = xywh2xyxy(b.reshape(-1, 4)).revel().astype(np.int)

                        # 裁剪bbox坐标到图片内
                        b[[0, 2]] = np.clip[b[[0, 2]], 0, w]
                        b[[1, 3]] = np.clip[b[[1, 3]], 0, h]
                        assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), "Failure extracting classifier boxes"
            else:
                ne += 1  # file empty

            # 处理进度条只在第一个进程中显示
            if rank in [-1, 0]:
                # 更新进度条描述信息
                pbar.desc = "Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)" % (
                    nf, nm, ne, nd, n)
        assert nf > 0, "No labels found in %s." % os.path.dirname(self.label_files[0]) + os.sep

        # 如果标签信息没有被保存成numpy的格式，且训练样本数大于1000则将标签信息保存成numpy的格式
        if not labels_loaded and n > 1000:
            print("Saving labels to %s for faster future loading" % np_labels_path)
            np.save(np_labels_path, self.labels)  # save for next time

        # Cache images into memory for faster training (Warning: large datasets may exceed system RAM)
        if cache_images:  # if training
            gb = 0  # Gigabytes of cached images 用于记录缓存图像占用RAM大小
            if rank in [-1, 0]:
                pbar = tqdm(range(len(self.img_files)), desc="Caching images")
            else:
                pbar = range(len(self.img_files))

            self.img_hw0, self.img_hw = [None] * n, [None] * n
            for i in pbar:  # max 10k images
                self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i)  # img, hw_original, hw_resized
                gb += self.imgs[i].nbytes  # 用于记录缓存图像占用RAM大小
                if rank in [-1, 0]:
                    pbar.desc = "Caching images (%.1fGB)" % (gb / 1E9)

        # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
        detect_corrupted_images = False
        if detect_corrupted_images:
            from skimage import io  # conda install -c conda-forge scikit-image
            for file in tqdm(self.img_files, desc="Detecting corrupted images"):
                try:
                    _ = io.imread(file)
                except Exception as e:
                    print("Corrupted image detected: {}, {}".format(file, e))

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, index):
        hyp = self.hyp
        if self.mosaic:
            # load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None
        else:
            # load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # letterbox
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img, shape, auto=False, scale_up=self.augment)
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

            # load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()  # label: class, x, y, w, h
                labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width
                labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img, labels,
                                            degrees=hyp["degrees"],
                                            translate=hyp["translate"],
                                            scale=hyp["scale"],
                                            shear=hyp["shear"])

            # Augment colorspace
            augment_hsv(img, h_gain=hyp["hsv_h"], s_gain=hyp["hsv_s"], v_gain=hyp["hsv_v"])

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0-1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True  # 随机水平翻转
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]  # 1 - x_center

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]  # 1 - y_center

        labels_out = torch.zeros((nL, 6))  # nL: number of labels
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert BGR to RGB, and HWC to CHW(3x512x512)
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes, index

    def coco_index(self, index):
        """该方法是专门为cocotools统计标签信息准备，不对图像和标签作任何处理"""
        o_shapes = self.shapes[index][::-1]  # wh to hw

        # load labels
        x = self.labels[index]
        labels = x.copy()  # label: class, x, y, w, h
        return torch.from_numpy(labels), o_shapes

    @staticmethod
    def collate_fn(batch):
        img, label, path, shapes, index = zip(*batch)  # transposed
        for i, l in enumerate(label):
            l[:, 0] = i  # add target image index for build_targets()
        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, index


def load_image(self, index):
    # loads 1 image from dataset, returns img, original hw, resized hw
    img = self.imgs[index]
    if img is None:  # not cached
        path = self.img_files[index]
        img = cv2.imread(path)  # BGR
        assert img is not None, "Image Not Found " + path
        h0, w0 = img.shape[:2]  # orig hw
        # img_size 设置的是预处理后输出的图片尺寸
        r = self.img_size / max(h0, w0)  # resize image to img_size
        if r != 1:  # if sizes are not equal
            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
    else:
        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized


def load_mosaic(self, index):
    """
    将四张图片拼接在一张马赛克图像中
    :param self:
    :param index: 需要获取的图像索引
    :return:
    """
    # loads images in a mosaic

    labels4 = []  # 拼接图像的label信息
    s = self.img_size
    # 随机初始化拼接图像的中心点坐标
    xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y
    # 从dataset中随机寻找三张图像进行拼接
    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices
    # 遍历四张图像进行拼接
    for i, index in enumerate(indices):
        # load image
        img, _, (h, w) = load_image(self, index)

        # place img in img4
        if i == 0:  # top left
            # 创建马赛克图像
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            # 计算截取的图像区域信息(以xc,yc为第一张图像的右下角坐标填充到马赛克图像中，丢弃越界的区域)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            # 计算截取的图像区域信息(以xc,yc为第二张图像的左下角坐标填充到马赛克图像中，丢弃越界的区域)
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            # 计算截取的图像区域信息(以xc,yc为第三张图像的右上角坐标填充到马赛克图像中，丢弃越界的区域)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
        elif i == 3:  # bottom right
            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            # 计算截取的图像区域信息(以xc,yc为第四张图像的左上角坐标填充到马赛克图像中，丢弃越界的区域)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        # 将截取的图像区域填充到马赛克图像的相应位置
        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        # 计算pad(图像边界与马赛克边界的距离，越界的情况为负值)
        padw = x1a - x1b
        padh = y1a - y1b

        # Labels 获取对应拼接图像的labels信息
        # [class_index, x_center, y_center, w, h]
        x = self.labels[index]
        labels = x.copy()  # 深拷贝，防止修改原数据
        if x.size > 0:  # Normalized xywh to pixel xyxy format
            # 计算标注数据在马赛克图像中的坐标(绝对坐标)
            labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw   # xmin
            labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh   # ymin
            labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw   # xmax
            labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh   # ymax
        labels4.append(labels)

    # Concat/clip labels
    if len(labels4):
        labels4 = np.concatenate(labels4, 0)
        # 设置上下限防止越界
        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use with random_affine

    # Augment
    # 随机旋转，缩放，平移以及错切
    img4, labels4 = random_affine(img4, labels4,
                                  degrees=self.hyp['degrees'],
                                  translate=self.hyp['translate'],
                                  scale=self.hyp['scale'],
                                  shear=self.hyp['shear'],
                                  border=-s // 2)  # border to remove

    return img4, labels4


def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
    """随机旋转，缩放，平移以及错切"""
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
    # 这里可以参考我写的博文: https://blog.csdn.net/qq_37541097/article/details/119420860
    # targets = [cls, xyxy]

    # 最终输出的图像尺寸，等于img4.shape / 2
    height = img.shape[0] + border * 2
    width = img.shape[1] + border * 2

    # Rotation and Scale
    # 生成旋转以及缩放矩阵
    R = np.eye(3)  # 生成对角阵
    a = random.uniform(-degrees, degrees)  # 随机旋转角度
    s = random.uniform(1 - scale, 1 + scale)  # 随机缩放因子
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

    # Translation
    # 生成平移矩阵
    T = np.eye(3)
    T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border  # x translation (pixels)
    T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border  # y translation (pixels)

    # Shear
    # 生成错切矩阵
    S = np.eye(3)
    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)

    # Combined rotation matrix
    M = S @ T @ R  # ORDER IS IMPORTANT HERE!!
    if (border != 0) or (M != np.eye(3)).any():  # image changed
        # 进行仿射变化
        img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))

    # Transform label coordinates
    n = len(targets)
    if n:
        # warp points
        xy = np.ones((n * 4, 3))
        xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
        # [4*n, 3] -> [n, 8]
        xy = (xy @ M.T)[:, :2].reshape(n, 8)

        # create new boxes
        # 对transform后的bbox进行修正(假设变换后的bbox变成了菱形，此时要修正成矩形)
        x = xy[:, [0, 2, 4, 6]]  # [n, 4]
        y = xy[:, [1, 3, 5, 7]]  # [n, 4]
        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T  # [n, 4]

        # reject warped points outside of image
        # 对坐标进行裁剪，防止越界
        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
        w = xy[:, 2] - xy[:, 0]
        h = xy[:, 3] - xy[:, 1]

        # 计算调整后的每个box的面积
        area = w * h
        # 计算调整前的每个box的面积
        area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])
        # 计算每个box的比例
        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))  # aspect ratio
        # 选取长宽大于4个像素，且调整前后面积比例大于0.2，且比例小于10的box
        i = (w > 4) & (h > 4) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 10)

        targets = targets[i]
        targets[:, 1:5] = xy[i]

    return img, targets


def augment_hsv(img, h_gain=0.5, s_gain=0.5, v_gain=0.5):
    # 这里可以参考我写的博文:https://blog.csdn.net/qq_37541097/article/details/119478023
    r = np.random.uniform(-1, 1, 3) * [h_gain, s_gain, v_gain] + 1  # random gains
    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
    dtype = img.dtype  # uint8

    x = np.arange(0, 256, dtype=np.int16)
    lut_hue = ((x * r[0]) % 180).astype(dtype)
    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed


def letterbox(img: np.ndarray,
              new_shape=(416, 416),
              color=(114, 114, 114),
              auto=True,
              scale_fill=False,
              scale_up=True):
    """
    将图片缩放调整到指定大小
    :param img:
    :param new_shape:
    :param color:
    :param auto:
    :param scale_fill:
    :param scale_up:
    :return:
    """

    shape = img.shape[:2]  # [h, w]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变
        r = min(r, 1.0)

    # compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小
        # 这里的取余操作可以保证padding后的图片是32的整数倍
        dw, dh = np.mod(dw, 32), np.mod(dh, 32)  # wh padding
    elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸
        dw, dh = 0, 0
        new_unpad = new_shape
        ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]  # wh ratios

    dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧
    dh /= 2

    # shape:[h, w]  new_unpad:[w, h]
    if shape[::-1] != new_unpad:
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding

    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)


def create_folder(path="./new_folder"):
    # Create floder
    if os.path.exists(path):
        shutil.rmtree(path)  # dalete output folder
    os.makedirs(path)  # make new output folder


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/img_utils.py
================================================
import numpy as np
import cv2


def letterbox(img: np.ndarray,
              new_shape=(416, 416),
              color=(114, 114, 114),
              auto=True,
              scale_fill=False,
              scale_up=True):
    """
    将图片缩放调整到指定大小
    :param img: 输入的图像numpy格式
    :param new_shape: 输入网络的shape
    :param color: padding用什么颜色填充
    :param auto:
    :param scale_fill: 简单粗暴缩放到指定大小
    :param scale_up:  只缩小，不放大
    :return:
    """

    shape = img.shape[:2]  # [h, w]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变
        r = min(r, 1.0)

    # compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小
        # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416)，如果是(512x512)可以保证是64的整数倍
        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding
    elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸
        dw, dh = 0, 0
        new_unpad = new_shape[::-1]  # [h, w] -> [w, h]
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # wh ratios

    dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧
    dh /= 2

    # shape:[h, w]  new_unpad:[w, h]
    if shape[::-1] != new_unpad:
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding

    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/layers.py
================================================
import torch.nn.functional as F
from .utils import *


def make_divisible(v, divisor):
    # Function ensures all layers have a channel number that is divisible by 8
    # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    return math.ceil(v / divisor) * divisor


class Flatten(nn.Module):
    # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
    def forward(self, x):
        return x.view(x.size(0), -1)


class Concat(nn.Module):
    # Concatenate a list of tensors along dimension
    def __init__(self, dimension=1):
        super(Concat, self).__init__()
        self.d = dimension

    def forward(self, x):
        return torch.cat(x, self.d)


class FeatureConcat(nn.Module):
    """
    将多个特征矩阵在channel维度进行concatenate拼接
    """
    def __init__(self, layers):
        super(FeatureConcat, self).__init__()
        self.layers = layers  # layer indices
        self.multiple = len(layers) > 1  # multiple layers flag

    def forward(self, x, outputs):
        return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]]


class WeightedFeatureFusion(nn.Module):  # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
    """
    将多个特征矩阵的值进行融合(add操作)
    """
    def __init__(self, layers, weight=False):
        super(WeightedFeatureFusion, self).__init__()
        self.layers = layers  # layer indices
        self.weight = weight  # apply weights boolean
        self.n = len(layers) + 1  # number of layers 融合的特征矩阵个数
        if weight:
            self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True)  # layer weights

    def forward(self, x, outputs):
        # Weights
        if self.weight:
            w = torch.sigmoid(self.w) * (2 / self.n)  # sigmoid weights (0-1)
            x = x * w[0]

        # Fusion
        nx = x.shape[1]  # input channels
        for i in range(self.n - 1):
            a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]]  # feature to add
            na = a.shape[1]  # feature channels

            # Adjust channels
            # 根据相加的两个特征矩阵的channel选择相加方式
            if nx == na:  # same shape 如果channel相同，直接相加
                x = x + a
            elif nx > na:  # slice input 如果channel不同，将channel多的特征矩阵砍掉部分channel保证相加的channel一致
                x[:, :na] = x[:, :na] + a  # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a
            else:  # slice feature
                x = x + a[:, :nx]

        return x


class MixConv2d(nn.Module):  # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595
    def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):
        super(MixConv2d, self).__init__()

        groups = len(k)
        if method == 'equal_ch':  # equal channels per group
            i = torch.linspace(0, groups - 1E-6, out_ch).floor()  # out_ch indices
            ch = [(i == g).sum() for g in range(groups)]
        else:  # 'equal_params': equal parameter count per group
            b = [out_ch] + [0] * groups
            a = np.eye(groups + 1, groups, k=-1)
            a -= np.roll(a, 1, axis=1)
            a *= np.array(k) ** 2
            a[0] = 1
            ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int)  # solve for equal weight indices, ax = b

        self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch,
                                          out_channels=ch[g],
                                          kernel_size=k[g],
                                          stride=stride,
                                          padding=k[g] // 2,  # 'same' pad
                                          dilation=dilation,
                                          bias=bias) for g in range(groups)])

    def forward(self, x):
        return torch.cat([m(x) for m in self.m], 1)


# Activation functions below -------------------------------------------------------------------------------------------
class SwishImplementation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x * torch.sigmoid(x)

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_tensors[0]
        sx = torch.sigmoid(x)  # sigmoid(ctx)
        return grad_output * (sx * (1 + x * (1 - sx)))


class MishImplementation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_tensors[0]
        sx = torch.sigmoid(x)
        fx = F.softplus(x).tanh()
        return grad_output * (fx + x * sx * (1 - fx * fx))


class MemoryEfficientSwish(nn.Module):
    def forward(self, x):
        return SwishImplementation.apply(x)


class MemoryEfficientMish(nn.Module):
    def forward(self, x):
        return MishImplementation.apply(x)


class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)


class HardSwish(nn.Module):  # https://arxiv.org/pdf/1905.02244.pdf
    def forward(self, x):
        return x * F.hardtanh(x + 3, 0., 6., True) / 6.


class Mish(nn.Module):  # https://github.com/digantamisra98/Mish
    def forward(self, x):
        return x * F.softplus(x).tanh()


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/parse_config.py
================================================
import os
import numpy as np


def parse_model_cfg(path: str):
    # 检查文件是否存在
    if not path.endswith(".cfg") or not os.path.exists(path):
        raise FileNotFoundError("the cfg file not exist...")

    # 读取文件信息
    with open(path, "r") as f:
        lines = f.read().split("\n")

    # 去除空行和注释行
    lines = [x for x in lines if x and not x.startswith("#")]
    # 去除每行开头和结尾的空格符
    lines = [x.strip() for x in lines]

    mdefs = []  # module definitions
    for line in lines:
        if line.startswith("["):  # this marks the start of a new block
            mdefs.append({})
            mdefs[-1]["type"] = line[1:-1].strip()  # 记录module类型
            # 如果是卷积模块，设置默认不使用BN(普通卷积层后面会重写成1，最后的预测层conv保持为0)
            if mdefs[-1]["type"] == "convolutional":
                mdefs[-1]["batch_normalize"] = 0
        else:
            key, val = line.split("=")
            key = key.strip()
            val = val.strip()

            if key == "anchors":
                # anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
                val = val.replace(" ", "")  # 将空格去除
                mdefs[-1][key] = np.array([float(x) for x in val.split(",")]).reshape((-1, 2))  # np anchors
            elif (key in ["from", "layers", "mask"]) or (key == "size" and "," in val):
                mdefs[-1][key] = [int(x) for x in val.split(",")]
            else:
                # TODO: .isnumeric() actually fails to get the float case
                if val.isnumeric():  # return int or float 如果是数值的情况
                    mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val)
                else:
                    mdefs[-1][key] = val  # return string  是字符的情况

    # check all fields are supported
    supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
                 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
                 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind',
                 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'probability']

    # 遍历检查每个模型的配置
    for x in mdefs[1:]:  # 0对应net配置
        # 遍历每个配置字典中的key值
        for k in x:
            if k not in supported:
                raise ValueError("Unsupported fields:{} in cfg".format(k))

    return mdefs


def parse_data_cfg(path):
    # Parses the data configuration file
    if not os.path.exists(path) and os.path.exists('data' + os.sep + path):  # add data/ prefix if omitted
        path = 'data' + os.sep + path

    with open(path, 'r') as f:
        lines = f.readlines()

    options = dict()
    for line in lines:
        line = line.strip()
        if line == '' or line.startswith('#'):
            continue
        key, val = line.split('=')
        options[key.strip()] = val.strip()

    return options


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/torch_utils.py
================================================
import math
import time
from copy import deepcopy

import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn


def init_seeds(seed=0):
    torch.manual_seed(seed)

    # Reduce randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
    if seed == 0:
        cudnn.deterministic = False
        cudnn.benchmark = True


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def initialize_weights(model):
    for m in model.modules():
        t = type(m)
        if t is nn.Conv2d:
            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        elif t is nn.BatchNorm2d:
            m.eps = 1e-4
            m.momentum = 0.03
        elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
            m.inplace = True


def model_info(model, verbose=False):
    # Plots a line-by-line description of a PyTorch model
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    if verbose:
        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
        for i, (name, p) in enumerate(model.named_parameters()):
            name = name.replace('module_list.', '')
            print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
                  (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))

    try:  # FLOPS
        from thop import profile
        macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
        fs = ', %.1f GFLOPS' % (macs / 1E9 * 2)
    except:
        fs = ''

    print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs))


class ModelEMA:
    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
    Keep a moving average of everything in the model state_dict (parameters and buffers).
    This is intended to allow functionality like
    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
    A smoothed version of the weights is necessary for some training schemes to perform well.
    E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
    RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
    smoothing of weights to match results. Pay attention to the decay constant you are using
    relative to your update count per epoch.
    To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
    disable validation of the EMA weights. Validation will have to be done manually in a separate
    process, or after the training stops converging.
    This class is sensitive where it is initialized in the sequence of model init,
    GPU assignment and distributed training wrappers.
    I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU.
    """

    def __init__(self, model, decay=0.9999, device=''):
        # make a copy of the model for accumulating moving average of weights
        self.ema = deepcopy(model)
        self.ema.eval()
        self.updates = 0  # number of EMA updates
        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
        self.device = device  # perform ema on different device from model if set
        if device:
            self.ema.to(device=device)
        for p in self.ema.parameters():
            p.requires_grad_(False)

    def update(self, model):
        self.updates += 1
        d = self.decay(self.updates)
        with torch.no_grad():
            if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
                msd, esd = model.module.state_dict(), self.ema.module.state_dict()
            else:
                msd, esd = model.state_dict(), self.ema.state_dict()

            for k, v in esd.items():
                if v.dtype.is_floating_point:
                    v *= d
                    v += (1. - d) * msd[k].detach()

    def update_attr(self, model):
        # Assign attributes (which may change during training)
        for k in model.__dict__.keys():
            if not k.startswith('_'):
                setattr(self.ema, k, getattr(model, k))


================================================
FILE: pytorch_object_detection/yolov3_spp/build_utils/utils.py
================================================
import glob
import math
import os
import random
import time

import cv2
import matplotlib
import numpy as np
import torch
import torch.nn as nn
import torchvision
from tqdm import tqdm

from build_utils import torch_utils  # , google_utils

# Set printoptions
torch.set_printoptions(linewidth=320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
matplotlib.rc('font', **{'size': 11})

# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
cv2.setNumThreads(0)


def init_seeds(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    torch_utils.init_seeds(seed=seed)


def check_file(file):
    # Searches for file if not found locally
    if os.path.isfile(file):
        return file
    else:
        files = glob.glob('./**/' + file, recursive=True)  # find file
        assert len(files), 'File Not Found: %s' % file  # assert file was found
        return files[0]  # return first file if multiple found


def xyxy2xywh(x):
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
    y[:, 2] = x[:, 2] - x[:, 0]  # width
    y[:, 3] = x[:, 3] - x[:, 1]  # height
    return y


def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    """
    将预测的坐标信息转换回原图尺度
    :param img1_shape: 缩放后的图像尺度
    :param coords: 预测的box信息
    :param img0_shape: 缩放前的图像尺度
    :param ratio_pad: 缩放过程中的缩放比例以及pad
    :return:
    """
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = max(img1_shape) / max(img0_shape)  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def clip_coords(boxes, img_shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, 0].clamp_(0, img_shape[1])  # x1
    boxes[:, 1].clamp_(0, img_shape[0])  # y1
    boxes[:, 2].clamp_(0, img_shape[1])  # x2
    boxes[:, 3].clamp_(0, img_shape[0])  # y2


def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.t()

    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
    union = (w1 * h1 + 1e-16) + w2 * h2 - inter

    iou = inter / union  # iou
    if GIoU or DIoU or CIoU:
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
        if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
            c_area = cw * ch + 1e-16  # convex area
            return iou - (c_area - union) / c_area  # GIoU
        if DIoU or CIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            # convex diagonal squared
            c2 = cw ** 2 + ch ** 2 + 1e-16
            # centerpoint distance squared
            rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4
            if DIoU:
                return iou - rho2 / c2  # DIoU
            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha = v / (1 - iou + v)
                return iou - (rho2 / c2 + v * alpha)  # CIoU

    return iou


def box_iou(box1, box2):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    def box_area(box):
        # box = 4xn
        return (box[2] - box[0]) * (box[3] - box[1])

    area1 = box_area(box1.t())
    area2 = box_area(box2.t())

    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)


def wh_iou(wh1, wh2):
    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
    wh1 = wh1[:, None]  # [N,1,2]
    wh2 = wh2[None]  # [1,M,2]
    inter = torch.min(wh1, wh2).prod(2)  # [N,M]
    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)


class FocalLoss(nn.Module):
    # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
        super(FocalLoss, self).__init__()
        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = loss_fcn.reduction
        self.loss_fcn.reduction = 'none'  # required to apply FL to each element

    def forward(self, pred, true):
        loss = self.loss_fcn(pred, true)
        # p_t = torch.exp(-loss)
        # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability

        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
        pred_prob = torch.sigmoid(pred)  # prob from logits
        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
        modulating_factor = (1.0 - p_t) ** self.gamma
        loss *= alpha_factor * modulating_factor

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:  # 'none'
            return loss


def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
    # return positive, negative label smoothing BCE targets
    return 1.0 - 0.5 * eps, 0.5 * eps


def compute_loss(p, targets, model):  # predictions, targets, model
    device = p[0].device
    lcls = torch.zeros(1, device=device)  # Tensor(0)
    lbox = torch.zeros(1, device=device)  # Tensor(0)
    lobj = torch.zeros(1, device=device)  # Tensor(0)
    tcls, tbox, indices, anchors = build_targets(p, targets, model)  # targets
    h = model.hyp  # hyperparameters
    red = 'mean'  # Loss reduction (sum or mean)

    # Define criteria
    BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device), reduction=red)
    BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device), reduction=red)

    # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
    cp, cn = smooth_BCE(eps=0.0)

    # focal loss
    g = h['fl_gamma']  # focal loss gamma
    if g > 0:
        BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)

    # per output
    for i, pi in enumerate(p):  # layer index, layer predictions
        b, a, gj, gi = indices[i]  # image_idx, anchor_idx, grid_y, grid_x
        tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj

        nb = b.shape[0]  # number of positive samples
        if nb:
            # 对应匹配到正样本的预测信息
            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets

            # GIoU
            pxy = ps[:, :2].sigmoid()
            pwh = ps[:, 2:4].exp().clamp(max=1E3) * anchors[i]
            pbox = torch.cat((pxy, pwh), 1)  # predicted box
            giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True)  # giou(prediction, target)
            lbox += (1.0 - giou).mean()  # giou loss

            # Obj
            tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio

            # Class
            if model.nc > 1:  # cls loss (only if multiple classes)
                t = torch.full_like(ps[:, 5:], cn, device=device)  # targets
                t[range(nb), tcls[i]] = cp
                lcls += BCEcls(ps[:, 5:], t)  # BCE

            # Append targets to text file
            # with open('targets.txt', 'a') as file:
            #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

        lobj += BCEobj(pi[..., 4], tobj)  # obj loss

    # 乘上每种损失的对应权重
    lbox *= h['giou']
    lobj *= h['obj']
    lcls *= h['cls']

    # loss = lbox + lobj + lcls
    return {"box_loss": lbox,
            "obj_loss": lobj,
            "class_loss": lcls}


def build_targets(p, targets, model):
    # Build targets for compute_loss(), input targets(image_idx,class,x,y,w,h)
    nt = targets.shape[0]
    tcls, tbox, indices, anch = [], [], [], []
    gain = torch.ones(6, device=targets.device).long()  # normalized to gridspace gain

    multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
    for i, j in enumerate(model.yolo_layers):  # j: [89, 101, 113]
        # 获取该yolo predictor对应的anchors
        # 注意anchor_vec是anchors缩放到对应特征层上的尺度
        anchors = model.module.module_list[j].anchor_vec if multi_gpu else model.module_list[j].anchor_vec
        # p[i].shape: [batch_size, 3, grid_h, grid_w, num_params]
        gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
        na = anchors.shape[0]  # number of anchors
        # [3] -> [3, 1] -> [3, nt]
        at = torch.arange(na).view(na, 1).repeat(1, nt)  # anchor tensor, same as .repeat_interleave(nt)

        # Match targets to anchors
        a, t, offsets = [], targets * gain, 0
        if nt:  # 如果存在target的话
            # 通过计算anchor模板与所有target的wh_iou来匹配正样本
            # j: [3, nt] , iou_t = 0.20
            j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
            # t.repeat(na, 1, 1): [nt, 6] -> [3, nt, 6]
            # 获取正样本对应的anchor模板与target信息
            a, t = at[j], t.repeat(na, 1, 1)[j]  # filter

        # Define
        # long等于to(torch.int64), 数值向下取整
        b, c = t[:, :2].long().T  # image_idx, class
        gxy = t[:, 2:4]  # grid xy
        gwh = t[:, 4:6]  # grid wh
        gij = (gxy - offsets).long()  # 匹配targets所在的grid cell左上角坐标
        gi, gj = gij.T  # grid xy indices

        # Append
        # gain[3]: grid_h, gain[2]: grid_w
        # image_idx, anchor_idx, grid indices(y, x)
        indices.append((b, a, gj.clamp_(0, gain[3]-1), gi.clamp_(0, gain[2]-1)))
        tbox.append(torch.cat((gxy - gij, gwh), 1))  # gt box相对anchor的x,y偏移量以及w,h
        anch.append(anchors[a])  # anchors
        tcls.append(c)  # class
        if c.shape[0]:  # if any targets
            # 目标的标签数值不能大于给定的目标类别数
            assert c.max() < model.nc, 'Model accepts %g classes labeled from 0-%g, however you labelled a class %g. ' \
                                       'See https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data' % (
                                           model.nc, model.nc - 1, c.max())

    return tcls, tbox, indices, anch


def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6,
                        multi_label=True, classes=None, agnostic=False, max_num=100):
    """
    Performs  Non-Maximum Suppression on inference results

    param: prediction[batch, num_anchors, (num_classes+1+4) x num_anchors]
    Returns detections with shape:
        nx6 (x1, y1, x2, y2, conf, cls)
    """

    # Settings
    merge = False  # merge for best mAP
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    time_limit = 10.0  # seconds to quit after

    t = time.time()
    nc = prediction[0].shape[1] - 5  # number of classes
    multi_label &= nc > 1  # multiple labels per box
    output = [None] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference 遍历每张图片
        # Apply constraints
        x = x[x[:, 4] > conf_thres]  # confidence 根据obj confidence虑除背景目标
        x = x[((x[:, 2:4] > min_wh) & (x[:, 2:4] < max_wh)).all(1)]  # width-height 虑除小目标

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[..., 5:] *= x[..., 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:  # 针对每个类别执行非极大值抑制
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).t()
            x = torch.cat((box[i], x[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1)
        else:  # best class only  直接针对每个类别中概率最大的类别进行非极大值抑制处理
            conf, j = x[:, 5:].max(1)
            x = torch.cat((box, conf.unsqueeze(1), j.float().unsqueeze(1)), 1)[conf > conf_thres]

        # Filter by class
        if classes:
            x = x[(j.view(-1, 1) == torch.tensor(classes, device=j.device)).any(1)]

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5] * 0 if agnostic else x[:, 5]  # classes
        boxes, scores = x[:, :4].clone() + c.view(-1, 1) * max_wh, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)
        i = i[:max_num]  # 最多只保留前max_num个目标信息
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
                # i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            break  # time limit exceeded

    return output


def get_yolo_layers(model):
    bool_vec = [x['type'] == 'yolo' for x in model.module_defs]
    return [i for i, x in enumerate(bool_vec) if x]  # [82, 94, 106] for yolov3


def kmean_anchors(path='./data/coco64.txt', n=9, img_size=(640, 640), thr=0.20, gen=1000):
    # Creates kmeans anchors for use in *.cfg files: from build_utils.build_utils import *; _ = kmean_anchors()
    # n: number of anchors
    # img_size: (min, max) image size used for multi-scale training (can be same values)
    # thr: IoU threshold hyperparameter used for training (0.0 - 1.0)
    # gen: generations to evolve anchors using genetic algorithm
    from build_utils.datasets import LoadImagesAndLabels

    def print_results(k):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        iou = wh_iou(wh, torch.Tensor(k))
        max_iou = iou.max(1)[0]
        bpr, aat = (max_iou > thr).float().mean(), (iou > thr).float().mean() * n  # best possible recall, anch > thr
        print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' % (thr, bpr, aat))
        print('n=%g, img_size=%s, IoU_all=%.3f/%.3f-mean/best, IoU>thr=%.3f-mean: ' %
              (n, img_size, iou.mean(), max_iou.mean(), iou[iou > thr].mean()), end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
        return k

    def fitness(k):  # mutation fitness
        iou = wh_iou(wh, torch.Tensor(k))  # iou
        max_iou = iou.max(1)[0]
        return (max_iou * (max_iou > thr).float()).mean()  # product

    # Get label wh
    wh = []
    dataset = LoadImagesAndLabels(path, augment=True, rect=True)
    nr = 1 if img_size[0] == img_size[1] else 10  # number augmentation repetitions
    for s, l in zip(dataset.shapes, dataset.labels):
        wh.append(l[:, 3:5] * (s / s.max()))  # image normalized to letterbox normalized wh
    wh = np.concatenate(wh, 0).repeat(nr, axis=0)  # augment 10x
    wh *= np.random.uniform(img_size[0], img_size[1], size=(wh.shape[0], 1))  # normalized to pixels (multi-scale)
    wh = wh[(wh > 2.0).all(1)]  # remove below threshold boxes (< 2 pixels wh)

    # Kmeans calculation
    from scipy.cluster.vq import kmeans
    print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
    s = wh.std(0)  # sigmas for whitening
    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
    k *= s
    wh = torch.Tensor(wh)
    k = print_results(k)

    # # Plot
    # k, d = [None] * 20, [None] * 20
    # for i in tqdm(range(1, 21)):
    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))
    # ax = ax.ravel()
    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
    # ax[0].hist(wh[wh[:, 0]<100, 0],400)
    # ax[1].hist(wh[wh[:, 1]<100, 1],400)
    # fig.tight_layout()
    # fig.savefig('wh.png', dpi=200)

    # Evolve
    npr = np.random
    f, sh, mp, s = fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    for _ in tqdm(range(gen), desc='Evolving anchors'):
        v = np.ones(sh)
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg = fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()
            print_results(k)
    k = print_results(k)

    return k


================================================
FILE: pytorch_object_detection/yolov3_spp/calculate_dataset.py
================================================
"""
该脚本有3个功能：
1.统计训练集和验证集的数据并生成相应.txt文件
2.创建data.data文件，记录classes个数, train以及val数据集文件(.txt)路径和label.names文件路径
3.根据yolov3-spp.cfg创建my_yolov3.cfg文件修改其中的predictor filters以及yolo classes参数(这两个参数是根据类别数改变的)
"""
import os

train_annotation_dir = "./my_yolo_dataset/train/labels"
val_annotation_dir = "./my_yolo_dataset/val/labels"
classes_label = "./data/my_data_label.names"
cfg_path = "./cfg/yolov3-spp.cfg"

assert os.path.exists(train_annotation_dir), "train_annotation_dir not exist!"
assert os.path.exists(val_annotation_dir), "val_annotation_dir not exist!"
assert os.path.exists(classes_label), "classes_label not exist!"
assert os.path.exists(cfg_path), "cfg_path not exist!"


def calculate_data_txt(txt_path, dataset_dir):
    # create my_data.txt file that record image list
    with open(txt_path, "w") as w:
        for file_name in os.listdir(dataset_dir):
            if file_name == "classes.txt":
                continue

            img_path = os.path.join(dataset_dir.replace("labels", "images"),
                                    file_name.split(".")[0]) + ".jpg"
            line = img_path + "\n"
            assert os.path.exists(img_path), "file:{} not exist!".format(img_path)
            w.write(line)


def create_data_data(create_data_path, label_path, train_path, val_path, classes_info):
    # create my_data.data file that record classes, train, valid and names info.
    # shutil.copyfile(label_path, "./data/my_data_label.names")
    with open(create_data_path, "w") as w:
        w.write("classes={}".format(len(classes_info)) + "\n")  # 记录类别个数
        w.write("train={}".format(train_path) + "\n")           # 记录训练集对应txt文件路径
        w.write("valid={}".format(val_path) + "\n")             # 记录验证集对应txt文件路径
        w.write("names=data/my_data_label.names" + "\n")        # 记录label.names文件路径


def change_and_create_cfg_file(classes_info, save_cfg_path="./cfg/my_yolov3.cfg"):
    # create my_yolov3.cfg file changed predictor filters and yolo classes param.
    # this operation only deal with yolov3-spp.cfg
    filters_lines = [636, 722, 809]
    classes_lines = [643, 729, 816]
    cfg_lines = open(cfg_path, "r").readlines()

    for i in filters_lines:
        assert "filters" in cfg_lines[i-1], "filters param is not in line:{}".format(i-1)
        output_num = (5 + len(classes_info)) * 3
        cfg_lines[i-1] = "filters={}\n".format(output_num)

    for i in classes_lines:
        assert "classes" in cfg_lines[i-1], "classes param is not in line:{}".format(i-1)
        cfg_lines[i-1] = "classes={}\n".format(len(classes_info))

    with open(save_cfg_path, "w") as w:
        w.writelines(cfg_lines)


def main():
    # 统计训练集和验证集的数据并生成相应txt文件
    train_txt_path = "data/my_train_data.txt"
    val_txt_path = "data/my_val_data.txt"
    calculate_data_txt(train_txt_path, train_annotation_dir)
    calculate_data_txt(val_txt_path, val_annotation_dir)

    classes_info = [line.strip() for line in open(classes_label, "r").readlines() if len(line.strip()) > 0]
    # 创建data.data文件，记录classes个数, train以及val数据集文件(.txt)路径和label.names文件路径
    create_data_data("./data/my_data.data", classes_label, train_txt_path, val_txt_path, classes_info)

    # 根据yolov3-spp.cfg创建my_yolov3.cfg文件修改其中的predictor filters以及yolo classes参数(这两个参数是根据类别数改变的)
    change_and_create_cfg_file(classes_info)


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/yolov3_spp/cfg/hyp.yaml
================================================
# Hyperparameters for training

giou: 3.54  # giou loss gain
cls: 37.4  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 64.3  # obj loss gain (*=img_size/320 if img_size != 320)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # iou training threshold
lr0: 0.001  # initial learning rate (SGD=5E-3 Adam=5E-4)
lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum
weight_decay: 0.0005  # optimizer weight decay
fl_gamma: 0.0  # focal loss gamma (efficientDet default is gamma=1.5)
hsv_h: 0.0138  # image HSV-Hue augmentation (fraction)
hsv_s: 0.678  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.36  # image HSV-Value augmentation (fraction)
degrees: 0.  # image rotation (+/- deg)
translate: 0.  # image translation (+/- fraction)
scale: 0.  # image scale (+/- gain)
shear: 0.  # image shear (+/- deg)

================================================
FILE: pytorch_object_detection/yolov3_spp/cfg/yolov3-spp.cfg
================================================
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=64         
subdivisions=16  
width=608        
height=608       
channels=3       
momentum=0.9     
decay=0.0005     
angle=0          
saturation = 1.5  
exposure = 1.5 
hue=.1    

learning_rate=0.001  
burn_in=1000   
max_batches = 500200 
policy=steps  
steps=400000,450000 
scales=.1,.1  

[convolutional]
batch_normalize=1 
filters=32    
size=3      
stride=1       
pad=1        
activation=leaky  

# Downsample

[convolutional]    
batch_normalize=1
filters=64
size=3
stride=2          
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]    
from=-3      
activation=linear  

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

### SPP ###
[maxpool]
stride=1
size=5

[route]
layers=-2

[maxpool]
stride=1
size=9

[route]
layers=-4

[maxpool]
stride=1
size=13

[route]
layers=-1,-3,-5,-6

### End SPP ###

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky


[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 6,7,8  
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80 
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 3,4,5
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 36


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 0,1,2
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1


================================================
FILE: pytorch_object_detection/yolov3_spp/draw_box_utils.py
================================================
from PIL.Image import Image, fromarray
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
from PIL import ImageColor
import numpy as np

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


def draw_text(draw,
              box: list,
              cls: int,
              score: float,
              category_index: dict,
              color: str,
              font: str = 'arial.ttf',
              font_size: int = 24):
    """
    将目标边界框和类别信息绘制到图片上
    """
    try:
        font = ImageFont.truetype(font, font_size)
    except IOError:
        font = ImageFont.load_default()

    left, top, right, bottom = box
    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
    display_str_heights = [font.getsize(ds)[1] for ds in display_str]
    # Each display_str has a top and bottom margin of 0.05x.
    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)

    if top > display_str_height:
        text_top = top - display_str_height
        text_bottom = top
    else:
        text_top = bottom
        text_bottom = bottom + display_str_height

    for ds in display_str:
        text_width, text_height = font.getsize(ds)
        margin = np.ceil(0.05 * text_width)
        draw.rectangle([(left, text_top),
                        (left + text_width + 2 * margin, text_bottom)], fill=color)
        draw.text((left + margin, text_top),
                  ds,
                  fill='black',
                  font=font)
        left += text_width


def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.array(image)
    masks = np.where(masks > thresh, True, False)

    # colors = np.array(colors)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    for mask, color in zip(masks, colors):
        img_to_draw[mask] = color

    out = np_image * (1 - alpha) + img_to_draw * alpha
    return fromarray(out.astype(np.uint8))


def draw_objs(image: Image,
              boxes: np.ndarray = None,
              classes: np.ndarray = None,
              scores: np.ndarray = None,
              masks: np.ndarray = None,
              category_index: dict = None,
              box_thresh: float = 0.1,
              mask_thresh: float = 0.5,
              line_thickness: int = 8,
              font: str = 'arial.ttf',
              font_size: int = 24,
              draw_boxes_on_image: bool = True,
              draw_masks_on_image: bool = False):
    """
    将目标边界框信息，类别信息，mask信息绘制在图片上
    Args:
        image: 需要绘制的图片
        boxes: 目标边界框信息
        classes: 目标类别信息
        scores: 目标概率信息
        masks: 目标mask信息
        category_index: 类别与名称字典
        box_thresh: 过滤的概率阈值
        mask_thresh:
        line_thickness: 边界框宽度
        font: 字体类型
        font_size: 字体大小
        draw_boxes_on_image:
        draw_masks_on_image:

    Returns:

    """

    # 过滤掉低概率的目标
    idxs = np.greater(scores, box_thresh)
    boxes = boxes[idxs]
    classes = classes[idxs]
    scores = scores[idxs]
    if masks is not None:
        masks = masks[idxs]
    if len(boxes) == 0:
        return image

    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]

    if draw_boxes_on_image:
        # Draw all boxes onto image.
        draw = ImageDraw.Draw(image)
        for box, cls, score, color in zip(boxes, classes, scores, colors):
            left, top, right, bottom = box
            # 绘制目标边界框
            draw.line([(left, top), (left, bottom), (right, bottom),
                       (right, top), (left, top)], width=line_thickness, fill=color)
            # 绘制类别和概率信息
            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)

    if draw_masks_on_image and (masks is not None):
        # Draw all mask onto image.
        image = draw_masks(image, masks, colors, mask_thresh)

    return image


================================================
FILE: pytorch_object_detection/yolov3_spp/export_onnx.py
================================================
import os
import torch
import cv2
import torch.onnx
import onnx
import onnxruntime
import numpy as np
import models
from build_utils import img_utils

device = torch.device("cpu")
models.ONNX_EXPORT = True


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "cfg/yolov3-spp.cfg"
    weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size)
    assert os.path.exists(cfg), "cfg file does not exist..."
    assert os.path.exists(weights), "weights file does not exist..."

    input_size = (img_size, img_size)  # [h, w]

    # create model
    model = models.Darknet(cfg, input_size)
    # load model weights
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)
    model.eval()
    # input to the model
    # [batch, channel, height, width]
    # x = torch.rand(1, 3, *input_size, requires_grad=True)
    img_path = "test.jpg"
    img_o = cv2.imread(img_path)  # BGR
    assert img_o is not None, "Image Not Found " + img_path

    # preprocessing img
    img = img_utils.letterbox(img_o, new_shape=input_size, auto=False, color=(0, 0, 0))[0]
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img).astype(np.float32)

    img /= 255.0  # scale (0, 255) to (0, 1)
    img = np.expand_dims(img, axis=0)  # add batch dimension
    x = torch.tensor(img)
    torch_out = model(x)

    save_path = "yolov3spp.onnx"
    # export the model
    torch.onnx.export(model,                       # model being run
                      x,                           # model input (or a tuple for multiple inputs)
                      save_path,                   # where to save the model (can be a file or file-like object)
                      export_params=True,          # store the trained parameter weights inside the model file
                      opset_version=12,            # the ONNX version to export the model to
                      do_constant_folding=True,    # whether to execute constant folding for optimization
                      input_names=["images"],       # the model's input names
                      # output_names=["classes", "boxes"],     # the model's output names
                      output_names=["prediction"],
                      dynamic_axes={"images": {0: "batch_size"},  # variable length axes
                                    "prediction": {0: "batch_size"}})
                                    # "classes": {0: "batch_size"},
                                    # "confidence": {0: "batch_size"},
                                    # "boxes": {0: "batch_size"}})

    # check onnx model
    onnx_model = onnx.load(save_path)
    onnx.checker.check_model(onnx_model)
    # print(onnx.helper.printable_graph(onnx_model.graph))

    ort_session = onnxruntime.InferenceSession(save_path)

    # compute ONNX Runtime output prediction
    ort_inputs = {"images": to_numpy(x)}
    ort_outs = ort_session.run(None, ort_inputs)

    # compare ONNX Runtime and Pytorch results
    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.
    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
    # np.testing.assert_allclose(to_numpy(torch_out[1]), ort_outs[1], rtol=1e-03, atol=1e-05)
    # np.testing.assert_allclose(to_numpy(torch_out[2]), ort_outs[2], rtol=1e-03, atol=1e-05)
    print("Exported model has been tested with ONNXRuntime, and the result looks good!")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/yolov3_spp/load_onnx_test.py
================================================
import time
import cv2
import onnx
import onnxruntime
import numpy as np
from matplotlib import pyplot as plt
from draw_box_utils import draw_box


def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()


def scale_img(img: np.ndarray,
              new_shape=(416, 416),
              color=(114, 114, 114),
              auto=True,
              scale_fill=False,
              scale_up=True):
    """
    将图片缩放调整到指定大小，若需要填充，均匀填充到上下左右侧
    :param img: 输入的图像numpy格式
    :param new_shape: 输入网络的shape
    :param color: padding用什么颜色填充
    :param auto: 将输入网络的较小边长调整到最近的64整数倍(输入图像的比例不变)，这样输入网络的尺寸比指定尺寸要小，计算量也会减小
    :param scale_fill: 简单粗暴缩放到指定大小
    :param scale_up:  只缩小，不放大
    :return:
    """

    shape = img.shape[:2]  # [h, w]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变
        r = min(r, 1.0)

    # compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小
        # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416)，如果是(512x512)可以保证是64的整数倍
        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding
    elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸
        dw, dh = 0, 0
        new_unpad = new_shape
        ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]  # wh ratios

    dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧
    dh /= 2

    # shape:[h, w]  new_unpad:[w, h]
    if shape[::-1] != new_unpad:
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding

    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)


def clip_coords(boxes: np.ndarray, img_shape: tuple):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, 0].clip(0, img_shape[1])  # x1
    boxes[:, 1].clip(0, img_shape[0])  # y1
    boxes[:, 2].clip(0, img_shape[1])  # x2
    boxes[:, 3].clip(0, img_shape[0])  # y2


def turn_back_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    """
    将预测的坐标信息转换回原图尺度
    :param img1_shape: 缩放后的图像尺度
    :param coords: 预测的box信息
    :param img0_shape: 缩放前的图像尺度
    :param ratio_pad: 缩放过程中的缩放比例以及pad
    :return:
    """
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = max(img1_shape) / max(img0_shape)  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def xywh2xyxy(x: np.ndarray):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def bboxes_iou(boxes1: np.ndarray, boxes2: np.ndarray) -> np.ndarray:
    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = np.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

    return ious


def nms(bboxes: np.ndarray, iou_threshold=0.5, soft_threshold=0.3, sigma=0.5, method="nms", ) -> np.ndarray:
    """
    单独对一个类别进行NMS处理
    :param bboxes: [x1, y1, x2, y2, score]
    :param iou_threshold: nms算法中使用到的阈值
    :param soft_threshold: soft-nms算法中使用到的阈值
    :param sigma: soft-nms gaussian sigma
    :param method: nms或者soft-nms
    :return: 返回保留目标的索引
    """
    assert method in ["nms", "soft-nms"]
    # [x1, y1, x2, y2, score] -> [x1, y1, x2, y2, score, index]
    bboxes = np.concatenate([bboxes, np.arange(bboxes.shape[0]).reshape(-1, 1)], axis=1)

    best_bboxes_index = []
    while len(bboxes) > 0:
        max_ind = np.argmax(bboxes[:, 4])  # 寻找概率最大目标索引
        best_bbox = bboxes[max_ind]
        best_bboxes_index.append(best_bbox[5])
        bboxes = np.concatenate([bboxes[:max_ind], bboxes[max_ind + 1:]])  # 将最大概率目标去除
        ious = bboxes_iou(best_bbox[np.newaxis, :4], bboxes[:, :4])

        if method == "nms":
            iou_mask = np.less(ious, iou_threshold)  # <
        else:  # soft-nms
            weight = np.exp(-(np.square(ious) / sigma))
            bboxes[:, 4] = bboxes[:, 4] * weight
            iou_mask = np.greater(bboxes[:, 4], soft_threshold)  # >

        bboxes = bboxes[iou_mask]

    return np.array(best_bboxes_index, dtype=np.int32)


def post_process(pred: np.ndarray, multi_label=False, conf_thres=0.3):
    """
    输入的xywh都是归一化后的值
    :param pred: [num_obj, [x1, y1, x2, y2, objectness, cls1, cls1...]]
    :param img_size:
    :param multi_label:
    :param conf_thres:
    :return:
    """
    min_wh, max_wh = 2, 4096
    pred = pred[pred[:, 4] > conf_thres]  # 虑除小objectness目标
    pred = pred[((pred[:, 2:4] > min_wh) & (pred[:, 2:4] < max_wh)).all(1)]  # 虑除规定尺度范围外的目标

    if pred.shape[0] == 0:
        return np.empty((0, 6))  # [x, y, x, y, score, class]

    box = xywh2xyxy(pred[:, :4])
    # Detections matrix nx6 (xyxy, conf, cls)
    if multi_label:  # 针对每个类别执行非极大值抑制
        # i, j = (x[:, 5:] > conf_thres).nonzero().t()
        # x = torch.cat((box[i], x[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1)
        pass
    else:  # best class only  直接针对每个类别中概率最大的类别进行非极大值抑制处理
        objectness = pred[:, 5:]
        class_index = np.argmax(objectness, axis=1)
        conf = objectness[(np.arange(pred.shape[0]), class_index)]
        # conf, j = predictions[:, 5:].max(1)
        pred = np.concatenate((box,
                               np.expand_dims(conf, axis=1),
                               np.expand_dims(class_index, axis=1)), 1)[conf > conf_thres]

    n = pred.shape[0]  # number of boxes
    if n == 0:
        return np.empty((0, 6))  # [x, y, x, y, score, class]

    cls = pred[:, 5]  # classes
    boxes, scores = pred[:, :4] + cls.reshape(-1, 1) * max_wh, pred[:, 4:5]
    t1 = time.time()
    indexes = nms(np.concatenate([boxes, scores], axis=1))
    print("NMS time is {}".format(time.time() - t1))
    pred = pred[indexes]

    return pred


def main():
    img_size = 512
    save_path = "yolov3spp.onnx"
    img_path = "test.jpg"
    input_size = (img_size, img_size)  # h, w

    # check onnx model
    onnx_model = onnx.load(save_path)
    onnx.checker.check_model(onnx_model)
    # print(onnx.helper.printable_graph(onnx_model.graph))
    ort_session = onnxruntime.InferenceSession(save_path)

    img_o = cv2.imread(img_path)  # BGR
    assert img_o is not None, "Image Not Found " + img_path

    # preprocessing img
    img, ratio, pad = scale_img(img_o, new_shape=input_size, auto=False, color=(0, 0, 0))
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img).astype(np.float32)

    img /= 255.0  # scale (0, 255) to (0, 1)
    img = np.expand_dims(img, axis=0)  # add batch dimension

    # compute ONNX Runtime output prediction
    ort_inputs = {"images": img}

    t1 = time.time()
    # prediction: [num_obj, 85]
    pred = ort_session.run(None, ort_inputs)[0]
    t2 = time.time()
    print(t2 - t1)
    # print(predictions.shape[0])
    # process detections
    # 这里预测的数值是相对坐标(0-1之间)，乘上图像尺寸转回绝对坐标
    pred[:, [0, 2]] *= input_size[1]
    pred[:, [1, 3]] *= input_size[0]
    pred = post_process(pred)

    # 将预测的bbox缩放回原图像尺度
    p_boxes = turn_back_coords(img1_shape=img.shape[2:],
                               coords=pred[:, :4],
                               img0_shape=img_o.shape,
                               ratio_pad=[ratio, pad]).round()
    # print(p_boxes.shape)

    bboxes = p_boxes
    scores = pred[:, 4]
    classes = pred[:, 5].astype(np.int) + 1

    category_index = dict([(i + 1, str(i + 1)) for i in range(90)])
    img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index)
    plt.imshow(img_o)
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_object_detection/yolov3_spp/models.py
================================================
from build_utils.layers import *
from build_utils.parse_config import *

ONNX_EXPORT = False


def create_modules(modules_defs: list, img_size):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    :param modules_defs: 通过.cfg文件解析得到的每个层结构的列表
    :param img_size:
    :return:
    """

    img_size = [img_size] * 2 if isinstance(img_size, int) else img_size
    # 删除解析cfg列表中的第一个配置(对应[net]的配置)
    modules_defs.pop(0)  # cfg training hyperparams (unused)
    output_filters = [3]  # input channels
    module_list = nn.ModuleList()
    # 统计哪些特征层的输出会被后续的层使用到(可能是特征融合，也可能是拼接)
    routs = []  # list of layers which rout to deeper layers
    yolo_index = -1

    # 遍历搭建每个层结构
    for i, mdef in enumerate(modules_defs):
        modules = nn.Sequential()

        if mdef["type"] == "convolutional":
            bn = mdef["batch_normalize"]  # 1 or 0 / use or not
            filters = mdef["filters"]
            k = mdef["size"]  # kernel size
            stride = mdef["stride"] if "stride" in mdef else (mdef['stride_y'], mdef["stride_x"])
            if isinstance(k, int):
                modules.add_module("Conv2d", nn.Conv2d(in_channels=output_filters[-1],
                                                       out_channels=filters,
                                                       kernel_size=k,
                                                       stride=stride,
                                                       padding=k // 2 if mdef["pad"] else 0,
                                                       bias=not bn))
            else:
                raise TypeError("conv2d filter size must be int type.")

            if bn:
                modules.add_module("BatchNorm2d", nn.BatchNorm2d(filters))
            else:
                # 如果该卷积操作没有bn层，意味着该层为yolo的predictor
                routs.append(i)  # detection output (goes into yolo layer)

            if mdef["activation"] == "leaky":
                modules.add_module("activation", nn.LeakyReLU(0.1, inplace=True))
            else:
                pass

        elif mdef["type"] == "BatchNorm2d":
            pass

        elif mdef["type"] == "maxpool":
            k = mdef["size"]  # kernel size
            stride = mdef["stride"]
            modules = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2)

        elif mdef["type"] == "upsample":
            if ONNX_EXPORT:  # explicitly state size, avoid scale_factor
                g = (yolo_index + 1) * 2 / 32  # gain
                modules = nn.Upsample(size=tuple(int(x * g) for x in img_size))
            else:
                modules = nn.Upsample(scale_factor=mdef["stride"])

        elif mdef["type"] == "route":  # [-2],  [-1,-3,-5,-6], [-1, 61]
            layers = mdef["layers"]
            filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
            routs.extend([i + l if l < 0 else l for l in layers])
            modules = FeatureConcat(layers=layers)

        elif mdef["type"] == "shortcut":
            layers = mdef["from"]
            filters = output_filters[-1]
            # routs.extend([i + l if l < 0 else l for l in layers])
            routs.append(i + layers[0])
            modules = WeightedFeatureFusion(layers=layers, weight="weights_type" in mdef)

        elif mdef["type"] == "yolo":
            yolo_index += 1  # 记录是第几个yolo_layer [0, 1, 2]
            stride = [32, 16, 8]  # 预测特征层对应原图的缩放比例

            modules = YOLOLayer(anchors=mdef["anchors"][mdef["mask"]],  # anchor list
                                nc=mdef["classes"],  # number of classes
                                img_size=img_size,
                                stride=stride[yolo_index])

            # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
            try:
                j = -1
                # bias: shape(255,) 索引0对应Sequential中的Conv2d
                # view: shape(3, 85)
                b = module_list[j][0].bias.view(modules.na, -1)
                b.data[:, 4] += -4.5  # obj
                b.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99))  # cls (sigmoid(p) = 1/nc)
                module_list[j][0].bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
            except Exception as e:
                print('WARNING: smart bias initialization failure.', e)
        else:
            print("Warning: Unrecognized Layer Type: " + mdef["type"])

        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    routs_binary = [False] * len(modules_defs)
    for i in routs:
        routs_binary[i] = True
    return module_list, routs_binary


class YOLOLayer(nn.Module):
    """
    对YOLO的输出进行处理
    """
    def __init__(self, anchors, nc, img_size, stride):
        super(YOLOLayer, self).__init__()
        self.anchors = torch.Tensor(anchors)
        self.stride = stride  # layer stride 特征图上一步对应原图上的步距 [32, 16, 8]
        self.na = len(anchors)  # number of anchors (3)
        self.nc = nc  # number of classes (80)
        self.no = nc + 5  # number of outputs (85: x, y, w, h, obj, cls1, ...)
        self.nx, self.ny, self.ng = 0, 0, (0, 0)  # initialize number of x, y gridpoints
        # 将anchors大小缩放到grid尺度
        self.anchor_vec = self.anchors / self.stride
        # batch_size, na, grid_h, grid_w, wh,
        # 值为1的维度对应的值不是固定值，后续操作可根据broadcast广播机制自动扩充
        self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2)
        self.grid = None

        if ONNX_EXPORT:
            self.training = False
            self.create_grids((img_size[1] // stride, img_size[0] // stride))  # number x, y grid points

    def create_grids(self, ng=(13, 13), device="cpu"):
        """
        更新grids信息并生成新的grids参数
        :param ng: 特征图大小
        :param device:
        :return:
        """
        self.nx, self.ny = ng
        self.ng = torch.tensor(ng, dtype=torch.float)

        # build xy offsets 构建每个cell处的anchor的xy偏移量(在feature map上的)
        if not self.training:  # 训练模式不需要回归到最终预测boxes
            yv, xv = torch.meshgrid([torch.arange(self.ny, device=device),
                                     torch.arange(self.nx, device=device)])
            # batch_size, na, grid_h, grid_w, wh
            self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float()

        if self.anchor_vec.device != device:
            self.anchor_vec = self.anchor_vec.to(device)
            self.anchor_wh = self.anchor_wh.to(device)

    def forward(self, p):
        if ONNX_EXPORT:
            bs = 1  # batch size
        else:
            bs, _, ny, nx = p.shape  # batch_size, predict_param(255), grid(13), grid(13)
            if (self.nx, self.ny) != (nx, ny) or self.grid is None:  # fix no grid bug
                self.create_grids((nx, ny), p.device)

        # view: (batch_size, 255, 13, 13) -> (batch_size, 3, 85, 13, 13)
        # permute: (batch_size, 3, 85, 13, 13) -> (batch_size, 3, 13, 13, 85)
        # [bs, anchor, grid, grid, xywh + obj + classes]
        p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous()  # prediction

        if self.training:
            return p
        elif ONNX_EXPORT:
            # Avoid broadcasting for ANE operations
            m = self.na * self.nx * self.ny  # 3*
            ng = 1. / self.ng.repeat(m, 1)
            grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2)
            anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng

            p = p.view(m, self.no)
            # xy = torch.sigmoid(p[:, 0:2]) + grid  # x, y
            # wh = torch.exp(p[:, 2:4]) * anchor_wh  # width, height
            # p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \
            #     torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5])  # conf
            p[:, :2] = (torch.sigmoid(p[:, 0:2]) + grid) * ng  # x, y
            p[:, 2:4] = torch.exp(p[:, 2:4]) * anchor_wh  # width, height
            p[:, 4:] = torch.sigmoid(p[:, 4:])
            p[:, 5:] = p[:, 5:self.no] * p[:, 4:5]
            return p
        else:  # inference
            # [bs, anchor, grid, grid, xywh + obj + classes]
            io = p.clone()  # inference output
            io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid  # xy 计算在feature map上的xy坐标
            io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method 计算在feature map上的wh
            io[..., :4] *= self.stride  # 换算映射回原图尺度
            torch.sigmoid_(io[..., 4:])
            return io.view(bs, -1, self.no), p  # view [1, 3, 13, 13, 85] as [1, 507, 85]


class Darknet(nn.Module):
    """
    YOLOv3 spp object detection model
    """
    def __init__(self, cfg, img_size=(416, 416), verbose=False):
        super(Darknet, self).__init__()
        # 这里传入的img_size只在导出ONNX模型时起作用
        self.input_size = [img_size] * 2 if isinstance(img_size, int) else img_size
        # 解析网络对应的.cfg文件
        self.module_defs = parse_model_cfg(cfg)
        # 根据解析的网络结构一层一层去搭建
        self.module_list, self.routs = create_modules(self.module_defs, img_size)
        # 获取所有YOLOLayer层的索引
        self.yolo_layers = get_yolo_layers(self)

        # 打印下模型的信息，如果verbose为True则打印详细信息
        self.info(verbose) if not ONNX_EXPORT else None  # print model description

    def forward(self, x, verbose=False):
        return self.forward_once(x, verbose=verbose)

    def forward_once(self, x, verbose=False):
        # yolo_out收集每个yolo_layer层的输出
        # out收集每个模块的输出
        yolo_out, out = [], []
        if verbose:
            print('0', x.shape)
            str = ""

        for i, module in enumerate(self.module_list):
            name = module.__class__.__name__
            if name in ["WeightedFeatureFusion", "FeatureConcat"]:  # sum, concat
                if verbose:
                    l = [i - 1] + module.layers  # layers
                    sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers]  # shapes
                    str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)])
                x = module(x, out)  # WeightedFeatureFusion(), FeatureConcat()
            elif name == "YOLOLayer":
                yolo_out.append(module(x))
            else:  # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc.
                x = module(x)

            out.append(x if self.routs[i] else [])
            if verbose:
                print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str)
                str = ''

        if self.training:  # train
            return yolo_out
        elif ONNX_EXPORT:  # export
            # x = [torch.cat(x, 0) for x in zip(*yolo_out)]
            # return x[0], torch.cat(x[1:3], 1)  # scores, boxes: 3780x80, 3780x4
            p = torch.cat(yolo_out, dim=0)

            # # 根据objectness虑除低概率目标
            # mask = torch.nonzero(torch.gt(p[:, 4], 0.1), as_tuple=False).squeeze(1)
            # # onnx不支持超过一维的索引（pytorch太灵活了）
            # # p = p[mask]
            # p = torch.index_select(p, dim=0, index=mask)
            #
            # # 虑除小面积目标，w > 2 and h > 2 pixel
            # # ONNX暂不支持bitwise_and和all操作
            # mask_s = torch.gt(p[:, 2], 2./self.input_size[0]) & torch.gt(p[:, 3], 2./self.input_size[1])
            # mask_s = torch.nonzero(mask_s, as_tuple=False).squeeze(1)
            # p = torch.index_select(p, dim=0, index=mask_s)  # width-height 虑除小目标
            #
            # if mask_s.numel() == 0:
            #     return torch.empty([0, 85])

            return p
        else:  # inference or test
            x, p = zip(*yolo_out)  # inference output, training output
            x = torch.cat(x, 1)  # cat yolo outputs

            return x, p

    def info(self, verbose=False):
        """
        打印模型的信息
        :param verbose:
        :return:
        """
        torch_utils.model_info(self, verbose)


def get_yolo_layers(self):
    """
    获取网络中三个"YOLOLayer"模块对应的索引
    :param self:
    :return:
    """
    return [i for i, m in enumerate(self.module_list) if m.__class__.__name__ == 'YOLOLayer']  # [89, 101, 113]


================================================
FILE: pytorch_object_detection/yolov3_spp/predict_test.py
================================================
import os
import json
import time

import torch
import cv2
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image

from build_utils import img_utils, torch_utils, utils
from models import Darknet
from draw_box_utils import draw_objs


def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "cfg/my_yolov3.cfg"  # 改成生成的.cfg文件
    weights_path = "weights/yolov3spp-voc-512.pt"  # 改成自己训练好的权重文件
    json_path = "./data/pascal_voc_classes.json"  # json标签文件
    img_path = "test.jpg"
    assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg)
    assert os.path.exists(weights), "weights file {} dose not exist.".format(weights)
    assert os.path.exists(json_path), "json file {} dose not exist.".format(json_path)
    assert os.path.exists(img_path), "image file {} dose not exist.".format(img_path)

    with open(json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {str(v): str(k) for k, v in class_dict.items()}

    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = Darknet(cfg, img_size)
    weights_dict = torch.load(weights_path, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    model.eval()
    with torch.no_grad():
        # init
        img = torch.zeros((1, 3, img_size, img_size), device=device)
        model(img)

        img_o = cv2.imread(img_path)  # BGR
        assert img_o is not None, "Image Not Found " + img_path

        img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device).float()
        img /= 255.0  # scale (0, 255) to (0, 1)
        img = img.unsqueeze(0)  # add batch dimension

        t1 = torch_utils.time_synchronized()
        pred = model(img)[0]  # only get inference result
        t2 = torch_utils.time_synchronized()
        print(t2 - t1)

        pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0]
        t3 = time.time()
        print(t3 - t2)

        if pred is None:
            print("No target detected.")
            exit(0)

        # process detections
        pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round()
        print(pred.shape)

        bboxes = pred[:, :4].detach().cpu().numpy()
        scores = pred[:, 4].detach().cpu().numpy()
        classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

        pil_img = Image.fromarray(img_o[:, :, ::-1])
        plot_img = draw_objs(pil_img,
                             bboxes,
                             classes,
                             scores,
                             category_index=category_index,
                             box_thresh=0.2,
                             line_thickness=3,
                             font='arial.ttf',
                             font_size=20)
        plt.imshow(plot_img)
        plt.show()
        # 保存预测的图片结果
        plot_img.save("test_result.jpg")


if __name__ == "__main__":
    main()


================================================
FILE: pytorch_object_detection/yolov3_spp/requirements.txt
================================================
numpy
opencv_python==4.3.0.36
lxml
torch==1.7.1
torchvision==0.8.2
scipy
pycocotools
matplotlib
tqdm
tensorboard==2.1.0
PyYAML


================================================
FILE: pytorch_object_detection/yolov3_spp/results20210515-152935.txt
================================================
epoch:0 0.2934  0.6118  0.2275  0.0649  0.2581  0.3549  0.2737  0.4685  0.4842  0.1264  0.4246  0.5404  10.2811  0.001
epoch:1 0.4889  0.7742  0.5507  0.1366  0.3838  0.577  0.4132  0.6066  0.6205  0.2383  0.5264  0.6874  6.6461  0.000997
epoch:2 0.4981  0.7902  0.5599  0.1587  0.3863  0.5807  0.4162  0.6044  0.618  0.2694  0.5156  0.6828  6.0806  0.000989
epoch:3 0.4907  0.7876  0.546  0.1648  0.3848  0.5721  0.4133  0.6051  0.62  0.2874  0.5287  0.6791  5.8333  0.000976
epoch:4 0.5099  0.7901  0.5811  0.1726  0.4117  0.59  0.4224  0.6193  0.6344  0.2929  0.5474  0.6911  5.6126  0.00957
epoch:5 0.5218  0.8066  0.5912  0.178  0.4189  0.5916  0.4292  0.6251  0.6396  0.2879  0.5427  0.6997  5.6512  0.000934
epoch:6 0.5187  0.8009  0.5893  0.1794  0.4257  0.5925  0.422  0.6202  0.6359  0.3063  0.5635  0.6893  5.4484  0.000905
epoch:7 0.5336  0.8059  0.6076  0.1823  0.429  0.6138  0.4379  0.6346  0.6495  0.3054  0.5545  0.711  5.3175  0.000873
epoch:8 0.5498  0.8066  0.6218  0.1735  0.4369  0.6336  0.4456  0.6476  0.6626  0.3079  0.5653  0.7285  5.307  0.000836
epoch:9 0.5445  0.8057  0.6274  0.1825  0.445  0.6269  0.438  0.6411  0.6557  0.3076  0.5747  0.7181  5.148  0.000796
epoch:10 0.532  0.798  0.6059  0.1833  0.4272  0.6159  0.4395  0.6376  0.6512  0.3086  0.5537  0.7181  5.0508  0.000752
epoch:11 0.5574  0.8063  0.6272  0.1873  0.44  0.6416  0.4524  0.6543  0.6682  0.3139  0.5658  0.7358  5.1974  0.000706
epoch:12 0.5675  0.8088  0.6422  0.1985  0.4522  0.6542  0.4584  0.6609  0.6734  0.3248  0.5752  0.7415  4.9259  0.000658
epoch:13 0.5553  0.8114  0.6323  0.1854  0.439  0.6358  0.4466  0.6481  0.662  0.3285  0.5664  0.7247  4.7405  0.000608
epoch:14 0.5663  0.8106  0.6375  0.1873  0.4472  0.6588  0.4565  0.6612  0.6745  0.307  0.5721  0.7462  4.9034  0.000557
epoch:15 0.5627  0.8094  0.6354  0.1939  0.4462  0.6529  0.4526  0.6569  0.6703  0.3335  0.5734  0.7374  4.9803  0.000505
epoch:16 0.5677  0.8085  0.6402  0.1973  0.4517  0.6551  0.4573  0.6629  0.6762  0.3204  0.5772  0.7464  4.6182  0.000453
epoch:17 0.569  0.8107  0.6387  0.1954  0.4483  0.6604  0.46  0.6666  0.6802  0.3323  0.5761  0.7497  4.7454  0.000402
epoch:18 0.5783  0.8097  0.646  0.201  0.4564  0.669  0.4661  0.6719  0.6847  0.3265  0.5749  0.7567  4.4123  0.000352
epoch:19 0.5808  0.8111  0.642  0.188  0.4568  0.6721  0.4654  0.6734  0.6866  0.3172  0.5771  0.7591  4.5915  0.000304
epoch:20 0.5774  0.8078  0.6411  0.1916  0.4519  0.6753  0.4669  0.6745  0.6881  0.3247  0.5772  0.7601  4.6747  0.000258
epoch:21 0.5879  0.8127  0.6522  0.197  0.4559  0.683  0.4716  0.6801  0.6934  0.3189  0.5778  0.7702  4.485  0.000214
epoch:22 0.5858  0.8074  0.6515  0.2126  0.4622  0.6786  0.4695  0.677  0.6904  0.3427  0.5814  0.7609  4.5962  0.000174
epoch:23 0.5893  0.8127  0.6501  0.2056  0.4596  0.6851  0.4729  0.682  0.6954  0.3345  0.583  0.7703  4.4483  0.000137
epoch:24 0.5902  0.8123  0.654  0.197  0.4601  0.6867  0.4738  0.683  0.6969  0.3279  0.5835  0.773  4.4526  0.000105
epoch:25 0.5948  0.816  0.6585  0.2031  0.4691  0.6888  0.4766  0.6864  0.7002  0.3379  0.5899  0.7736  4.4878  0.000076
epoch:26 0.5921  0.8136  0.6555  0.2036  0.4706  0.6847  0.4746  0.6841  0.6975  0.3446  0.5915  0.7681  4.5259  0.000053
epoch:27 0.5896  0.8089  0.6511  0.204  0.4666  0.6839  0.4734  0.682  0.6949  0.3422  0.5852  0.7669  4.3678  0.000034
epoch:28 0.5956  0.8149  0.6579  0.2089  0.4683  0.6893  0.4768  0.6868  0.7008  0.3448  0.5908  0.7741  4.5182  0.000021
epoch:29 0.5907  0.8097  0.6508  0.2078  0.4701  0.6831  0.4726  0.682  0.695  0.3476  0.5906  0.7645  4.2529  0.000013


================================================
FILE: pytorch_object_detection/yolov3_spp/train.py
================================================
import datetime
import argparse

import yaml
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter

from models import *
from build_utils.datasets import *
from build_utils.utils import *
from train_utils import train_eval_utils as train_util
from train_utils import get_coco_api_from_dataset


def train(hyp):
    device = torch.device(opt.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    wdir = "weights" + os.sep  # weights dir
    best = wdir + "best.pt"
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    cfg = opt.cfg
    data = opt.data
    epochs = opt.epochs
    batch_size = opt.batch_size
    accumulate = max(round(64 / batch_size), 1)  # accumulate n times before optimizer update (bs 64)
    weights = opt.weights  # initial training weights
    imgsz_train = opt.img_size
    imgsz_test = opt.img_size  # test image sizes
    multi_scale = opt.multi_scale

    # Image sizes
    # 图像要设置成32的倍数
    gs = 32  # (pixels) grid size
    assert math.fmod(imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs)
    grid_min, grid_max = imgsz_test // gs, imgsz_test // gs
    if multi_scale:
        imgsz_min = opt.img_size // 1.5
        imgsz_max = opt.img_size // 0.667

        # 将给定的最大，最小输入尺寸向下调整到32的整数倍
        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
        imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs)
        imgsz_train = imgsz_max  # initialize with max size
        print("Using multi_scale training, image range[{}, {}]".format(imgsz_min, imgsz_max))

    # configure run
    # init_seeds()  # 初始化随机种子，保证结果可复现
    data_dict = parse_data_cfg(data)
    train_path = data_dict["train"]
    test_path = data_dict["valid"]
    nc = 1 if opt.single_cls else int(data_dict["classes"])  # number of classes
    hyp["cls"] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
    hyp["obj"] *= imgsz_test / 320

    # Remove previous results
    for f in glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = Darknet(cfg).to(device)

    # 是否冻结权重，只训练predictor的权重
    if opt.freeze_layers:
        # 索引减一对应的是predictor的索引，YOLOLayer并不是predictor
        output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if
                                isinstance(module, YOLOLayer)]
        # 冻结除predictor和YOLOLayer外的所有层
        freeze_layer_indeces = [x for x in range(len(model.module_list)) if
                                (x not in output_layer_indices) and
                                (x - 1 not in output_layer_indices)]
        # Freeze non-output layers
        # 总共训练3x2=6个parameters
        for idx in freeze_layer_indeces:
            for parameter in model.module_list[idx].parameters():
                parameter.requires_grad_(False)
    else:
        # 如果freeze_layer为False，默认仅训练除darknet53之后的部分
        # 若要训练全部权重，删除以下代码
        darknet_end_layer = 74  # only yolov3spp cfg
        # Freeze darknet53 layers
        # 总共训练21x3+3x2=69个parameters
        for idx in range(darknet_end_layer + 1):  # [0, 74]
            for parameter in model.module_list[idx].parameters():
                parameter.requires_grad_(False)

    # optimizer
    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=hyp["lr0"], momentum=hyp["momentum"],
                          weight_decay=hyp["weight_decay"], nesterov=True)

    scaler = torch.cuda.amp.GradScaler() if opt.amp else None

    start_epoch = 0
    best_map = 0.0
    if weights.endswith(".pt") or weights.endswith(".pth"):
        ckpt = torch.load(weights, map_location=device)

        # load model
        try:
            ckpt["model"] = {k: v for k, v in ckpt["model"].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(ckpt["model"], strict=False)
        except KeyError as e:
            s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
            raise KeyError(s) from e

        # load optimizer
        if ckpt["optimizer"] is not None:
            optimizer.load_state_dict(ckpt["optimizer"])
            if "best_map" in ckpt.keys():
                best_map = ckpt["best_map"]

        # load results
        if ckpt.get("training_results") is not None:
            with open(results_file, "w") as file:
                file.write(ckpt["training_results"])  # write results.txt

        # epochs
        start_epoch = ckpt["epoch"] + 1
        if epochs < start_epoch:
            print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
                  (opt.weights, ckpt['epoch'], epochs))
            epochs += ckpt['epoch']  # finetune additional epochs

        if opt.amp and "scaler" in ckpt:
            scaler.load_state_dict(ckpt["scaler"])

        del ckpt

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp["lrf"]) + hyp["lrf"]  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    scheduler.last_epoch = start_epoch  # 指定从哪个epoch开始

    # Plot lr schedule
    # y = []
    # for _ in range(epochs):
    #     scheduler.step()
    #     y.append(optimizer.param_groups[0]['lr'])
    # plt.plot(y, '.-', label='LambdaLR')
    # plt.xlabel('epoch')
    # plt.ylabel('LR')
    # plt.tight_layout()
    # plt.savefig('LR.png', dpi=300)

    # model.yolo_layers = model.module.yolo_layers

    # dataset
    # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸
    train_dataset = LoadImagesAndLabels(train_path, imgsz_train, batch_size,
                                        augment=True,
                                        hyp=hyp,  # augmentation hyperparameters
                                        rect=opt.rect,  # rectangular training
                                        cache_images=opt.cache_images,
                                        single_cls=opt.single_cls)

    # 验证集的图像尺寸指定为img_size(512)
    val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size,
                                      hyp=hyp,
                                      rect=True,  # 将每个batch的图像调整到合适大小，可减少运算量(并不是512x512标准尺寸)
                                      cache_images=opt.cache_images,
                                      single_cls=opt.single_cls)

    # dataloader
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=nw,
                                                   # Shuffle=True unless rectangular training is used
                                                   shuffle=not opt.rect,
                                                   pin_memory=True,
                                                   collate_fn=train_dataset.collate_fn)

    val_datasetloader = torch.utils.data.DataLoader(val_dataset,
                                                    batch_size=batch_size,
                                                    num_workers=nw,
                                                    pin_memory=True,
                                                    collate_fn=val_dataset.collate_fn)

    # Model parameters
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    # 计算每个类别的目标个数，并计算每个类别的比重
    # model.class_weights = labels_to_class_weights(train_dataset.labels, nc).to(device)  # attach class weights

    # start training
    # caching val_data when you have plenty of memory(RAM)
    # coco = None
    coco = get_coco_api_from_dataset(val_dataset)

    print("starting traning for %g epochs..." % epochs)
    print('Using %g dataloader workers' % nw)
    for epoch in range(start_epoch, epochs):
        mloss, lr = train_util.train_one_epoch(model, optimizer, train_dataloader,
                                               device, epoch,
                                               accumulate=accumulate,  # 迭代多少batch才训练完64张图片
                                               img_size=imgsz_train,  # 输入图像的大小
                                               multi_scale=multi_scale,
                                               grid_min=grid_min,  # grid的最小尺寸
                                               grid_max=grid_max,  # grid的最大尺寸
                                               gs=gs,  # grid step: 32
                                               print_freq=50,  # 每训练多少个step打印一次信息
                                               warmup=True,
                                               scaler=scaler)
        # update scheduler
        scheduler.step()

        if opt.notest is False or epoch == epochs - 1:
            # evaluate on the test dataset
            result_info = train_util.evaluate(model, val_datasetloader,
                                              coco=coco, device=device)

            coco_mAP = result_info[0]
            voc_mAP = result_info[1]
            coco_mAR = result_info[8]

            # write into tensorboard
            if tb_writer:
                tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate",
                        "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]"]

                for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags):
                    tb_writer.add_scalar(tag, x, epoch)

            # write into txt
            with open(results_file, "a") as f:
                # 记录coco的12个指标加上训练总损失和lr
                result_info = [str(round(i, 4)) for i in result_info + [mloss.tolist()[-1]]] + [str(round(lr, 6))]
                txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                f.write(txt + "\n")

            # update best mAP(IoU=0.50:0.95)
            if coco_mAP > best_map:
                best_map = coco_mAP

            if opt.savebest is False:
                # save weights every epoch
                with open(results_file, 'r') as f:
                    save_files = {
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'training_results': f.read(),
                        'epoch': epoch,
                        'best_map': best_map}
                    if opt.amp:
                        save_files["scaler"] = scaler.state_dict()
                    torch.save(save_files, "./weights/yolov3spp-{}.pt".format(epoch))
            else:
                # only save best weights
                if best_map == coco_mAP:
                    with open(results_file, 'r') as f:
                        save_files = {
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                            'training_results': f.read(),
                            'epoch': epoch,
                            'best_map': best_map}
                        if opt.amp:
                            save_files["scaler"] = scaler.state_dict()
                        torch.save(save_files, best.format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=4)
    parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help="*.cfg path")
    parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path')
    parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path')
    parser.add_argument('--multi-scale', type=bool, default=True,
                        help='adjust (67%% - 150%%) img_size every 10 batches')
    parser.add_argument('--img-size', type=int, default=512, help='test size')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--savebest', type=bool, default=False, help='only save best checkpoint')
    parser.add_argument('--notest', action='store_true', help='only test final epoch')
    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
    parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics-512.pt',
                        help='initial weights path')
    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')
    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
    parser.add_argument('--freeze-layers', type=bool, default=False, help='Freeze non-output layers')
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")
    opt = parser.parse_args()

    # 检查文件是否存在
    opt.cfg = check_file(opt.cfg)
    opt.data = check_file(opt.data)
    opt.hyp = check_file(opt.hyp)
    print(opt)

    with open(opt.hyp) as f:
        hyp = yaml.load(f, Loader=yaml.FullLoader)

    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter(comment=opt.name)
    train(hyp)


================================================
FILE: pytorch_object_detection/yolov3_spp/train_multi_GPU.py
================================================
import argparse
import datetime
import pickle

import yaml
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter


from models import *
from build_utils.datasets import *
from build_utils.utils import *
from train_utils import train_eval_utils as train_util
from train_utils import get_coco_api_from_dataset, init_distributed_mode, torch_distributed_zero_first


def main(opt, hyp):
    # 初始化各进程
    init_distributed_mode(opt)

    if opt.rank in [-1, 0]:
        print(opt)
        print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
        tb_writer = SummaryWriter(comment=opt.name)

    device = torch.device(opt.device)
    if "cuda" not in device.type:
        raise EnvironmentError("not find GPU device for training.")

    # 使用DDP后会对每个device上的gradients取均值，所以需要放大学习率
    hyp["lr0"] *= max(1., opt.world_size * opt.batch_size / 64)

    wdir = "weights" + os.sep  # weights dir
    best = wdir + "best.pt"
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    cfg = opt.cfg
    data = opt.data
    epochs = opt.epochs
    batch_size = opt.batch_size
    # accumulate n times before optimizer update (bs 64)
    accumulate = max(round(64 / (opt.world_size * opt.batch_size)), 1)
    weights = opt.weights  # initial training weights
    imgsz_train = opt.img_size
    imgsz_test = opt.img_size  # test image sizes
    multi_scale = opt.multi_scale

    # Image sizes
    # 图像要设置成32的倍数
    gs = 32  # (pixels) grid size
    assert math.fmod(imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs)
    grid_min, grid_max = imgsz_test // gs, imgsz_test // gs
    if multi_scale:
        imgsz_min = opt.img_size // 1.5
        imgsz_max = opt.img_size // 0.667

        # 将给定的最大，最小输入尺寸向下调整到32的整数倍
        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
        imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs)
        imgsz_train = imgsz_max  # initialize with max size
        if opt.rank in [-1, 0]:  # 只在第一个进程中显示打印信息
            print("Using multi_scale training, image range[{}, {}]".format(imgsz_min, imgsz_max))

    # configure run
    random.seed(0)  # 设置随机种子
    data_dict = parse_data_cfg(data)
    train_path = data_dict["train"]
    test_path = data_dict["valid"]
    nc = 1 if opt.single_cls else int(data_dict["classes"])  # number of classes
    hyp["cls"] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
    hyp["obj"] *= imgsz_test / 320

    if opt.rank in [-1, 0]:
        # Remove previous results
        for f in glob.glob(results_file) + glob.glob("tmp.pk"):
            os.remove(f)

    # Initialize model
    model = Darknet(cfg).to(device)

    start_epoch = 0
    best_map = 0.0
    # 如果指定了预训练权重，则载入预训练权重
    if weights.endswith(".pt"):
        ckpt = torch.load(weights, map_location=device)

        # load model
        try:
            ckpt["model"] = {k: v for k, v in ckpt["model"].items()
                             if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(ckpt["model"], strict=False)
        except KeyError as e:
            s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
            raise KeyError(s) from e

        if opt.rank in [-1, 0]:
            # load results
            if ckpt.get("training_results") is not None:
                with open(results_file, "w") as file:
                    file.write(ckpt["training_results"])  # write results.txt

        # epochs
        start_epoch = ckpt["epoch"] + 1
        if epochs < start_epoch:
            print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
                  (opt.weights, ckpt['epoch'], epochs))
            epochs += ckpt['epoch']  # finetune additional epochs

        if opt.amp and "scaler" in ckpt:
            scaler.load_state_dict(ckpt["scaler"])

        del ckpt

    # 是否冻结权重，只训练predictor的权重
    if opt.freeze_layers:
        # 索引减一对应的是predictor的索引，YOLOLayer并不是predictor
        output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if
                                isinstance(module, YOLOLayer)]
        # 冻结除predictor和YOLOLayer外的所有层
        freeze_layer_indeces = [x for x in range(len(model.module_list)) if
                                (x not in output_layer_indices) and
                                (x - 1 not in output_layer_indices)]
        # Freeze non-output layers
        # 总共训练3x2=6个parameters
        for idx in freeze_layer_indeces:
            for parameter in model.module_list[idx].parameters():
                parameter.requires_grad_(False)
    else:
        # 如果freeze_layer为False，默认仅训练除darknet53之后的部分
        # 若要训练全部权重，删除以下代码
        darknet_end_layer = 74  # only yolov3spp cfg
        # Freeze darknet53 layers
        # 总共训练21x3+3x2=69个parameters
        for idx in range(darknet_end_layer + 1):  # [0, 74]
            for parameter in model.module_list[idx].parameters():
                parameter.requires_grad_(False)

    # SyncBatchNorm
    # 如果只训练最后的predictor(其中不含bn层)，SyncBatchNorm没有作用
    if opt.freeze_layers is False:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)

    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu])
    model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # optimizer
    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=hyp["lr0"], momentum=hyp["momentum"],
                          weight_decay=hyp["weight_decay"], nesterov=True)

    scaler = torch.cuda.amp.GradScaler() if opt.amp else None

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp["lrf"]) + hyp["lrf"]  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    scheduler.last_epoch = start_epoch  # 指定从哪个epoch开始

    # dataset
    # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸
    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.
    with torch_distributed_zero_first(opt.rank):
        train_dataset = LoadImagesAndLabels(train_path, imgsz_train, batch_size,
                                            augment=True,
                                            hyp=hyp,  # augmentation hyperparameters
                                            rect=opt.rect,  # rectangular training
                                            cache_images=opt.cache_images,
                                            single_cls=opt.single_cls,
                                            rank=opt.rank)
        # 验证集的图像尺寸指定为img_size(512)
        val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size,
                                          hyp=hyp,
                                          cache_images=opt.cache_images,
                                          single_cls=opt.single_cls,
                                          rank=opt.rank)

    # 给每个rank对应的进程分配训练的样本索引
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    # 将样本索引每batch_size个元素组成一个list
    train_batch_sampler = torch.utils.data.BatchSampler(
        train_sampler, batch_size, drop_last=True)

    # dataloader
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    if opt.rank in [-1, 0]:
        print('Using %g dataloader workers' % nw)
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_sampler=train_batch_sampler, num_workers=nw,
        pin_memory=True, collate_fn=train_dataset.collate_fn)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=batch_size,
        sampler=val_sampler, num_workers=nw,
        pin_memory=True, collate_fn=val_dataset.collate_fn)

    # Model parameters
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)

    # start training
    # caching val_data when you have plenty of memory(RAM)
    with torch_distributed_zero_first(opt.rank):
        if os.path.exists("tmp.pk") is False:
            coco = get_coco_api_from_dataset(val_dataset)
            with open("tmp.pk", "wb") as f:
                pickle.dump(coco, f)
        else:
            with open("tmp.pk", "rb") as f:
                coco = pickle.load(f)

    if opt.rank in [-1, 0]:
        print("starting traning for %g epochs..." % epochs)
        print('Using %g dataloader workers' % nw)

    start_time = time.time()
    for epoch in range(start_epoch, epochs):
        train_sampler.set_epoch(epoch)
        mloss, lr = train_util.train_one_epoch(model, optimizer, train_data_loader,
                                               device, epoch,
                                               accumulate=accumulate,  # 迭代多少batch才训练完64张图片
                                               img_size=imgsz_train,  # 输入图像的大小
                                               multi_scale=multi_scale,
                                               grid_min=grid_min,  # grid的最小尺寸
                                               grid_max=grid_max,  # grid的最大尺寸
                                               gs=gs,  # grid step: 32
                                               print_freq=50,  # 每训练多少个step打印一次信息
                                               warmup=True,
                                               scaler=scaler)
        # update scheduler
        scheduler.step()

        if opt.notest is False or epoch == epochs - 1:
            # evaluate on the test dataset
            result_info = train_util.evaluate(model, val_data_loader,
                                              coco=coco, device=device)

            # only first process in DDP process to record info and save weights
            if opt.rank in [-1, 0]:
                coco_mAP = result_info[0]
                voc_mAP = result_info[1]
                coco_mAR = result_info[8]

                # write into tensorboard
                if tb_writer:
                    tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate",
                            "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]"]

                    for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags):
                        tb_writer.add_scalar(tag, x, epoch)

                # write into txt
                with open(results_file, "a") as f:
                    # 记录coco的12个指标加上训练总损失和lr
                    result_info = [str(round(i, 4)) for i in result_info + [mloss.tolist()[-1]]] + [str(round(lr, 6))]
                    txt = "epoch:{} {}".format(epoch, '  '.join(result_info))
                    f.write(txt + "\n")

                # update best mAP(IoU=0.50:0.95)
                if coco_mAP > best_map:
                    best_map = coco_mAP

                if opt.savebest is False:
                    # save weights every epoch
                    with open(results_file, 'r') as f:
                        save_files = {
                            'model': model.module.state_dict(),
                            'optimizer': optimizer.state_dict(),
                            'training_results': f.read(),
                            'epoch': epoch,
                            'best_map': best_map}
                        if opt.amp:
                            save_files["scaler"] = scaler.state_dict()
                        torch.save(save_files, "./weights/yolov3spp-{}.pt".format(epoch))
                else:
                    # only save best weights
                    if best_map == coco_mAP:
                        with open(results_file, 'r') as f:
                            save_files = {
                                'model': model.module.state_dict(),
                                'optimizer': optimizer.state_dict(),
                                'training_results': f.read(),
                                'epoch': epoch,
                                'best_map': best_map}
                            if opt.amp:
                                save_files["scaler"] = scaler.state_dict()
                            torch.save(save_files, best.format(epoch))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    if opt.rank in [-1, 0]:
        print('Training time {}'.format(total_time_str))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', type=int, default=30)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help="*.cfg path")
    parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path')
    parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path')
    parser.add_argument('--multi-scale', type=bool, default=True,
                        help='adjust (67%% - 150%%) img_size every 10 batches')
    parser.add_argument('--img-size', type=int, default=512, help='test size')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--savebest', type=bool, default=False, help='only save best checkpoint')
    parser.add_argument('--notest', action='store_true', help='only test final epoch')
    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
    parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics-512.pt',
                        help='initial weights path')
    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')
    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
    parser.add_argument('--freeze-layers', type=bool, default=False, help='Freeze non-output layers')
    # 开启的进程数(注意不是线程),不用设置该参数，会根据nproc_per_node自动设置
    parser.add_argument('--world-size', default=4, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # 是否使用混合精度训练(需要GPU支持混合精度)
    parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training")

    opt = parser.parse_args()

    # 检查文件是否存在
    opt.cfg = check_file(opt.cfg)
    opt.data = check_file(opt.data)
    opt.hyp = check_file(opt.hyp)

    with open(opt.hyp) as f:
        hyp = yaml.load(f, Loader=yaml.FullLoader)

    main(opt, hyp)


================================================
FILE: pytorch_object_detection/yolov3_spp/train_utils/__init__.py
================================================
from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator
from .distributed_utils import init_distributed_mode, torch_distributed_zero_first


================================================
FILE: pytorch_object_detection/yolov3_spp/train_utils/coco_eval.py
================================================
import json
import copy
from collections import defaultdict

import numpy as np
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util

from .distributed_utils import all_gather


class CocoEvaluator(object):
    def __init__(self, coco_gt, iou_types):
        assert isinstance(iou_types, (list, tuple))
        coco_gt = copy.deepcopy(coco_gt)
        self.coco_gt = coco_gt

        self.iou_types = iou_types
        self.coco_eval = {}
        for iou_type in iou_types:
            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)

        self.img_ids = []
        self.eval_imgs = {k: [] for k in iou_types}

    def update(self, predictions):
        img_ids = list(np.unique(list(predictions.keys())))
        self.img_ids.extend(img_ids)

        for iou_type in self.iou_types:
            results = self.prepare(predictions, iou_type)
            coco_dt = loadRes(self.coco_gt, results) if results else COCO()
            coco_eval = self.coco_eval[iou_type]

            coco_eval.cocoDt = coco_dt
            coco_eval.params.imgIds = list(img_ids)
            img_ids, eval_imgs = evaluate(coco_eval)

            self.eval_imgs[iou_type].append(eval_imgs)

    def synchronize_between_processes(self):
        for iou_type in self.iou_types:
            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])

    def accumulate(self):
        for coco_eval in self.coco_eval.values():
            coco_eval.accumulate()

    def summarize(self):
        for iou_type, coco_eval in self.coco_eval.items():
            print("IoU metric: {}".format(iou_type))
            coco_eval.summarize()

    def prepare(self, predictions, iou_type):
        if iou_type == "bbox":
            return self.prepare_for_coco_detection(predictions)
        elif iou_type == "segm":
            return self.prepare_for_coco_segmentation(predictions)
        elif iou_type == "keypoints":
            return self.prepare_for_coco_keypoint(predictions)
        else:
            raise ValueError("Unknown iou type {}".format(iou_type))

    def prepare_for_coco_detection(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "bbox": box,
                        "score": scores[k],
                    }
                    for k, box in enumerate(boxes)
                ]
            )
        return coco_results

    def prepare_for_coco_segmentation(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            scores = prediction["scores"]
            labels = prediction["labels"]
            masks = prediction["masks"]

            masks = masks > 0.5

            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()

            rles = [
                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                for mask in masks
            ]
            for rle in rles:
                rle["counts"] = rle["counts"].decode("utf-8")

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "segmentation": rle,
                        "score": scores[k],
                    }
                    for k, rle in enumerate(rles)
                ]
            )
        return coco_results

    def prepare_for_coco_keypoint(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue

            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            keypoints = prediction["keypoints"]
            keypoints = keypoints.flatten(start_dim=1).tolist()

            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        'keypoints': keypoint,
                        "score": scores[k],
                    }
                    for k, keypoint in enumerate(keypoints)
                ]
            )
        return coco_results


def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)


def merge(img_ids, eval_imgs):
    all_img_ids = all_gather(img_ids)
    all_eval_imgs = all_gather(eval_imgs)

    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)

    merged_eval_imgs = []
    for p in all_eval_imgs:
        merged_eval_imgs.append(p)

    merged_img_ids = np.array(merged_img_ids)
    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)

    # keep only unique (and in sorted order) images
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_imgs = merged_eval_imgs[..., idx]

    return merged_img_ids, merged_eval_imgs


def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
    img_ids, eval_imgs = merge(img_ids, eval_imgs)
    img_ids = list(img_ids)
    eval_imgs = list(eval_imgs.flatten())

    coco_eval.evalImgs = eval_imgs
    coco_eval.params.imgIds = img_ids
    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)


#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################

# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions

def createIndex(self):
    # create index
    # print('creating index...')
    anns, cats, imgs = {}, {}, {}
    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
    if 'annotations' in self.dataset:
        for ann in self.dataset['annotations']:
            imgToAnns[ann['image_id']].append(ann)
            anns[ann['id']] = ann

    if 'images' in self.dataset:
        for img in self.dataset['images']:
            imgs[img['id']] = img

    if 'categories' in self.dataset:
        for cat in self.dataset['categories']:
            cats[cat['id']] = cat

    if 'annotations' in self.dataset and 'categories' in self.dataset:
        for ann in self.dataset['annotations']:
            catToImgs[ann['category_id']].append(ann['image_id'])

    # print('index created!')

    # create class members
    self.anns = anns
    self.imgToAnns = imgToAnns
    self.catToImgs = catToImgs
    self.imgs = imgs
    self.cats = cats


maskUtils = mask_util


def loadRes(self, resFile):
    """
    Load result file and return a result api object.
    :param   resFile (str)     : file name of result file
    :return: res (obj)         : result api object
    """
    res = COCO()
    res.dataset['images'] = [img for img in self.dataset['images']]

    # print('Loading and preparing results...')
    # tic = time.time()
    if isinstance(resFile, torch._six.string_classes):
        anns = json.load(open(resFile))
    elif type(resFile) == np.ndarray:
        anns = self.loadNumpyAnnotations(resFile)
    else:
        anns = resFile
    assert type(anns) == list, 'results in not an array of objects'
    annsImgIds = [ann['image_id'] for ann in anns]
    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
        'Results do not correspond to current coco set'
    if 'caption' in anns[0]:
        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
        for id, ann in enumerate(anns):
            ann['id'] = id + 1
    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            bb = ann['bbox']
            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
            if 'segmentation' not in ann:
                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
            ann['area'] = bb[2] * bb[3]
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'segmentation' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            # now only support compressed RLE format as segmentation results
            ann['area'] = maskUtils.area(ann['segmentation'])
            if 'bbox' not in ann:
                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'keypoints' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            s = ann['keypoints']
            x = s[0::3]
            y = s[1::3]
            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
            ann['area'] = (x2 - x1) * (y2 - y1)
            ann['id'] = id + 1
            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))

    res.dataset['annotations'] = anns
    createIndex(res)
    return res


def evaluate(self):
    '''
    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
    :return: None
    '''
    # tic = time.time()
    # print('Running per image evaluation...')
    p = self.params
    # add backward compatibility if useSegm is specified in params
    if p.useSegm is not None:
        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
    # print('Evaluate annotation type *{}*'.format(p.iouType))
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)
    self.params = p

    self._prepare()
    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]

    if p.iouType == 'segm' or p.iouType == 'bbox':
        computeIoU = self.computeIoU
    elif p.iouType == 'keypoints':
        computeIoU = self.computeOks
    self.ious = {
        (imgId, catId): computeIoU(imgId, catId)
        for imgId in p.imgIds
        for catId in catIds}

    evaluateImg = self.evaluateImg
    maxDet = p.maxDets[-1]
    evalImgs = [
        evaluateImg(imgId, catId, areaRng, maxDet)
        for catId in catIds
        for areaRng in p.areaRng
        for imgId in p.imgIds
    ]
    # this is NOT in the pycocotools code, but could be done outside
    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
    self._paramsEval = copy.deepcopy(self.params)
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format(toc-tic))
    return p.imgIds, evalImgs

#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################


================================================
FILE: pytorch_object_detection/yolov3_spp/train_utils/coco_utils.py
================================================
from tqdm import tqdm

import torch
import torchvision
import torch.utils.data
from pycocotools.coco import COCO


def convert_to_coco_api(ds):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0
    ann_id = 1
    dataset = {'images': [], 'categories': [], 'annotations': []}
    categories = set()
    # 遍历dataset中的每张图像
    for img_idx in tqdm(range(len(ds)), desc="loading eval info for coco tools."):
        # find better way to get target
        targets, shapes = ds.coco_index(img_idx)
        # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h)
        img_dict = {}
        img_dict['id'] = img_idx
        img_dict['height'] = shapes[0]
        img_dict['width'] = shapes[1]
        dataset['images'].append(img_dict)

        for obj in targets:
            ann = {}
            ann["image_id"] = img_idx
            # 将相对坐标转为绝对坐标
            # box (x, y, w, h)
            boxes = obj[1:]
            # (x, y, w, h) to (xmin, ymin, w, h)
            boxes[:2] -= 0.5*boxes[2:]
            boxes[[0, 2]] *= img_dict["width"]
            boxes[[1, 3]] *= img_dict["height"]
            boxes = boxes.tolist()

            ann["bbox"] = boxes
            ann["category_id"] = int(obj[0])
            categories.add(int(obj[0]))
            ann["area"] = boxes[2] * boxes[3]
            ann["iscrowd"] = 0
            ann["id"] = ann_id
            dataset["annotations"].append(ann)
            ann_id += 1

    dataset['categories'] = [{'id': i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds


def get_coco_api_from_dataset(dataset):
    for _ in range(10):
        if isinstance(dataset, torchvision.datasets.CocoDetection):
            break
        if isinstance(dataset, torch.utils.data.Subset):
            dataset = dataset.dataset
    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return dataset.coco
    return convert_to_coco_api(dataset)


================================================
FILE: pytorch_object_detection/yolov3_spp/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import pickle
import time
import errno
import os
from contextlib import contextmanager

import torch
import torch.distributed as dist


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:  # 单GPU的情况
        return input_dict
    with torch.no_grad():  # 多GPU的情况
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size

        reduced_dict = {k: v for k, v in zip(names, values)}
        return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}',
                                           'max mem: {memory:.0f}'])
        else:
            log_msg = self.delimiter.join([header,
                                           '[{0' + space_fmt + '}/{1}]',
                                           'eta: {eta}',
                                           '{meters}',
                                           'time: {time}',
                                           'data: {data}'])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_second = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=eta_second))
                if torch.cuda.is_available():
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time),
                                         memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(i, len(iterable),
                                         eta=eta_string,
                                         meters=str(self),
                                         time=str(iter_time),
                                         data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(header,
                                                         total_time_str,

                                                         total_time / len(iterable)))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        """根据step数返回一个学习率倍率因子"""
        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1
            return 1
        alpha = float(x) / warmup_iters
        # 迭代过程中倍率因子从warmup_factor -> 1
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    """检查是否支持分布式环境"""
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


@contextmanager
def torch_distributed_zero_first(local_rank: int):
    """
    Decorator to make all processes in distributed training wait for each local_master to do something.
    """
    if local_rank not in [-1, 0]:
        torch.distributed.barrier()
    yield
    if local_rank == 0:
        torch.distributed.barrier()


================================================
FILE: pytorch_object_detection/yolov3_spp/train_utils/group_by_aspect_ratio.py
================================================
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np

import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision

from PIL import Image


def _repeat_to_at_least(iterable, n):
    repeat_times = math.ceil(n / len(iterable))
    repeated = chain.from_iterable(repeat(iterable, repeat_times))
    return list(repeated)


class GroupedBatchSampler(BatchSampler):
    """
    Wraps another sampler to yield a mini-batch of indices.
    It enforces that the batch only contain elements from the same group.
    It also tries to provide mini-batches which follows an ordering which is
    as close as possible to the ordering from the original sampler.
    Arguments:
        sampler (Sampler): Base sampler.
        group_ids (list[int]): If the sampler produces indices in range [0, N),
            `group_ids` must be a list of `N` ints which contains the group id of each sample.
            The group ids must be a continuous set of integers starting from
            0, i.e. they must be in the range [0, num_groups).
        batch_size (int): Size of mini-batch.
    """
    def __init__(self, sampler, group_ids, batch_size):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = group_ids
        self.batch_size = batch_size

    def __iter__(self):
        buffer_per_group = defaultdict(list)
        samples_per_group = defaultdict(list)

        num_batches = 0
        for idx in self.sampler:
            group_id = self.group_ids[idx]
            buffer_per_group[group_id].append(idx)
            samples_per_group[group_id].append(idx)
            if len(buffer_per_group[group_id]) == self.batch_size:
                yield buffer_per_group[group_id]
                num_batches += 1
                del buffer_per_group[group_id]
            assert len(buffer_per_group[group_id]) < self.batch_size

        # now we have run out of elements that satisfy
        # the group criteria, let's return the remaining
        # elements so that the size of the sampler is
        # deterministic
        expected_num_batches = len(self)
        num_remaining = expected_num_batches - num_batches
        if num_remaining > 0:
            # for the remaining batches, take first the buffers with largest number
            # of elements
            for group_id, _ in sorted(buffer_per_group.items(),
                                      key=lambda x: len(x[1]), reverse=True):
                remaining = self.batch_size - len(buffer_per_group[group_id])
                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                assert len(buffer_per_group[group_id]) == self.batch_size
                yield buffer_per_group[group_id]
                num_remaining -= 1
                if num_remaining == 0:
                    break
        assert num_remaining == 0

    def __len__(self):
        return len(self.sampler) // self.batch_size


def _compute_aspect_ratios_slow(dataset, indices=None):
    print("Your dataset doesn't support the fast path for "
          "computing the aspect ratios, so will iterate over "
          "the full dataset and load every image instead. "
          "This might take some time...")
    if indices is None:
        indices = range(len(dataset))

    class SubsetSampler(Sampler):
        def __init__(self, indices):
            self.indices = indices

        def __iter__(self):
            return iter(self.indices)

        def __len__(self):
            return len(self.indices)

    sampler = SubsetSampler(indices)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, sampler=sampler,
        num_workers=14,  # you might want to increase it for faster processing
        collate_fn=lambda x: x[0])
    aspect_ratios = []
    with tqdm(total=len(dataset)) as pbar:
        for _i, (img, _) in enumerate(data_loader):
            pbar.update(1)
            height, width = img.shape[-2:]
            aspect_ratio = float(width) / float(height)
            aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        height, width = dataset.get_height_and_width(i)
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        img_info = dataset.coco.imgs[dataset.ids[i]]
        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))
    aspect_ratios = []
    for i in indices:
        # this doesn't load the data into memory, because PIL loads it lazily
        width, height = Image.open(dataset.images[i]).size
        aspect_ratio = float(width) / float(height)
        aspect_ratios.append(aspect_ratio)
    return aspect_ratios


def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
    if indices is None:
        indices = range(len(dataset))

    ds_indices = [dataset.indices[i] for i in indices]
    return compute_aspect_ratios(dataset.dataset, ds_indices)


def compute_aspect_ratios(dataset, indices=None):
    if hasattr(dataset, "get_height_and_width"):
        return _compute_aspect_ratios_custom_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return _compute_aspect_ratios_coco_dataset(dataset, indices)

    if isinstance(dataset, torchvision.datasets.VOCDetection):
        return _compute_aspect_ratios_voc_dataset(dataset, indices)

    if isinstance(dataset, torch.utils.data.Subset):
        return _compute_aspect_ratios_subset_dataset(dataset, indices)

    # slow path
    return _compute_aspect_ratios_slow(dataset, indices)


def _quantize(x, bins):
    bins = copy.deepcopy(bins)
    bins = sorted(bins)
    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引
    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
    return quantized


def create_aspect_ratio_groups(dataset, k=0):
    # 计算所有数据集中的图片width/height比例
    aspect_ratios = compute_aspect_ratios(dataset)
    # 将[0.5, 2]区间划分成2*k+1等份
    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]

    # 统计所有图像比例在bins区间中的位置索引
    groups = _quantize(aspect_ratios, bins)
    # count number of elements per group
    # 统计每个区间的频次
    counts = np.unique(groups, return_counts=True)[1]
    fbins = [0] + bins + [np.inf]
    print("Using {} as bins for aspect ratio quantization".format(fbins))
    print("Count of instances per bin: {}".format(counts))
    return groups


================================================
FILE: pytorch_object_detection/yolov3_spp/train_utils/train_eval_utils.py
================================================
import sys

from torch.cuda import amp
import torch.nn.functional as F

from build_utils.utils import *
from .coco_eval import CocoEvaluator
from .coco_utils import get_coco_api_from_dataset
import train_utils.distributed_utils as utils


def train_one_epoch(model, optimizer, data_loader, device, epoch,
                    print_freq, accumulate, img_size,
                    grid_min, grid_max, gs,
                    multi_scale=False, warmup=False, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
        accumulate = 1

    mloss = torch.zeros(4).to(device)  # mean losses
    now_lr = 0.
    nb = len(data_loader)  # number of batches
    # imgs: [batch_size, 3, img_size, img_size]
    # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h)
    # paths: list of img path
    for i, (imgs, targets, paths, _, _) in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        # ni 统计从epoch0开始的所有batch数
        ni = i + nb * epoch  # number integrated batches (since train start)
        imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)

        # Multi-Scale
        if multi_scale:
            # 每训练64张图片，就随机修改一次输入图片大小，
            # 由于label已转为相对坐标，故缩放图片不影响label的值
            if ni % accumulate == 0:  # adjust img_size (67% - 150%) every 1 batch
                # 在给定最大最小输入尺寸范围内随机选取一个size(size为32的整数倍)
                img_size = random.randrange(grid_min, grid_max + 1) * gs
            sf = img_size / max(imgs.shape[2:])  # scale factor

            # 如果图片最大边长不等于img_size, 则缩放图片，并将长和宽调整到32的整数倍
            if sf != 1:
                # gs: (pixels) grid size
                ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
                imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用
        with amp.autocast(enabled=scaler is not None):
            pred = model(imgs)

            # loss
            loss_dict = compute_loss(pred, targets, model)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purpose
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_items = torch.cat((loss_dict_reduced["box_loss"],
                                loss_dict_reduced["obj_loss"],
                                loss_dict_reduced["class_loss"],
                                losses_reduced)).detach()
        mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses

        if not torch.isfinite(losses_reduced):
            print('WARNING: non-finite loss, ending training ', loss_dict_reduced)
            print("training image path: {}".format(",".join(paths)))
            sys.exit(1)

        losses *= 1. / accumulate  # scale loss

        # backward
        if scaler is not None:
            scaler.scale(losses).backward()
        else:
            losses.backward()

        # optimize
        # 每训练64张图片更新一次权重
        if ni % accumulate == 0:
            if scaler is not None:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        now_lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(lr=now_lr)

        if ni % accumulate == 0 and lr_scheduler is not None:  # 第一轮使用warmup训练方式
            lr_scheduler.step()

    return mloss, now_lr


@torch.no_grad()
def evaluate(model, data_loader, coco=None, device=None):
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test: "

    if coco is None:
        coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for imgs, targets, paths, shapes, img_index in metric_logger.log_every(data_loader, 100, header):
        imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
        # targets = targets.to(device)

        # 当使用CPU时，跳过GPU相关指令
        if device != torch.device("cpu"):
            torch.cuda.synchronize(device)

        model_time = time.time()
        pred = model(imgs)[0]  # only get inference result
        pred = non_max_suppression(pred, conf_thres=0.01, iou_thres=0.6, multi_label=False)
        model_time = time.time() - model_time

        outputs = []
        for index, p in enumerate(pred):
            if p is None:
                p = torch.empty((0, 6), device=cpu_device)
                boxes = torch.empty((0, 4), device=cpu_device)
            else:
                # xmin, ymin, xmax, ymax
                boxes = p[:, :4]
                # shapes: (h0, w0), ((h / h0, w / w0), pad)
                # 将boxes信息还原回原图尺度，这样计算的mAP才是准确的
                boxes = scale_coords(imgs[index].shape[1:], boxes, shapes[index][0]).round()

            # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax，且为绝对坐标
            info = {"boxes": boxes.to(cpu_device),
                    "labels": p[:, 5].to(device=cpu_device, dtype=torch.int64),
                    "scores": p[:, 4].to(cpu_device)}
            outputs.append(info)

        res = {img_id: output for img_id, output in zip(img_index, outputs)}

        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    result_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list

    return result_info


def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    return iou_types


================================================
FILE: pytorch_object_detection/yolov3_spp/trans_voc2yolo.py
================================================
"""
本脚本有两个功能：
1.将voc数据集标注信息(.xml)转为yolo标注格式(.txt)，并将图像文件复制到相应文件夹
2.根据json标签文件，生成对应names标签(my_data_label.names)
"""
import os
from tqdm import tqdm
from lxml import etree
import json
import shutil


# voc数据集根目录以及版本
voc_root = "/data/VOCdevkit"
voc_version = "VOC2012"

# 转换的训练集以及验证集对应txt文件
train_txt = "train.txt"
val_txt = "val.txt"

# 转换后的文件保存目录
save_file_root = "./my_yolo_dataset"

# label标签对应json文件
label_json_path = './data/pascal_voc_classes.json'

# 拼接出voc的images目录，xml目录，txt目录
voc_images_path = os.path.join(voc_root, voc_version, "JPEGImages")
voc_xml_path = os.path.join(voc_root, voc_version, "Annotations")
train_txt_path = os.path.join(voc_root, voc_version, "ImageSets", "Main", train_txt)
val_txt_path = os.path.join(voc_root, voc_version, "ImageSets", "Main", val_txt)

# 检查文件/文件夹都是否存在
assert os.path.exists(voc_images_path), "VOC images path not exist..."
assert os.path.exists(voc_xml_path), "VOC xml path not exist..."
assert os.path.exists(train_txt_path), "VOC train txt file not exist..."
assert os.path.exists(val_txt_path), "VOC val txt file not exist..."
assert os.path.exists(label_json_path), "label_json_path does not exist..."
if os.path.exists(save_file_root) is False:
    os.makedirs(save_file_root)


def parse_xml_to_dict(xml):
    """
    将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
    Args：
        xml: xml tree obtained by parsing XML file contents using lxml.etree

    Returns:
        Python dictionary holding XML contents.
    """

    if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
        return {xml.tag: xml.text}

    result = {}
    for child in xml:
        child_result = parse_xml_to_dict(child)  # 递归遍历标签信息
        if child.tag != 'object':
            result[child.tag] = child_result[child.tag]
        else:
            if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                result[child.tag] = []
            result[child.tag].append(child_result[child.tag])
    return {xml.tag: result}


def translate_info(file_names: list, save_root: str, class_dict: dict, train_val='train'):
    """
    将对应xml文件信息转为yolo中使用的txt文件信息
    :param file_names:
    :param save_root:
    :param class_dict:
    :param train_val:
    :return:
    """
    save_txt_path = os.path.join(save_root, train_val, "labels")
    if os.path.exists(save_txt_path) is False:
        os.makedirs(save_txt_path)
    save_images_path = os.path.join(save_root, train_val, "images")
    if os.path.exists(save_images_path) is False:
        os.makedirs(save_images_path)

    for file in tqdm(file_names, desc="translate {} file...".format(train_val)):
        # 检查下图像文件是否存在
        img_path = os.path.join(voc_images_path, file + ".jpg")
        assert os.path.exists(img_path), "file:{} not exist...".format(img_path)

        # 检查xml文件是否存在
        xml_path = os.path.join(voc_xml_path, file + ".xml")
        assert os.path.exists(xml_path), "file:{} not exist...".format(xml_path)

        # read xml
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = parse_xml_to_dict(xml)["annotation"]
        img_height = int(data["size"]["height"])
        img_width = int(data["size"]["width"])

        # write object info into txt
        assert "object" in data.keys(), "file: '{}' lack of object key.".format(xml_path)
        if len(data["object"]) == 0:
            # 如果xml文件中没有目标就直接忽略该样本
            print("Warning: in '{}' xml, there are no objects.".format(xml_path))
            continue

        with open(os.path.join(save_txt_path, file + ".txt"), "w") as f:
            for index, obj in enumerate(data["object"]):
                # 获取每个object的box信息
                xmin = float(obj["bndbox"]["xmin"])
                xmax = float(obj["bndbox"]["xmax"])
                ymin = float(obj["bndbox"]["ymin"])
                ymax = float(obj["bndbox"]["ymax"])
                class_name = obj["name"]
                class_index = class_dict[class_name] - 1  # 目标id从0开始

                # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan
                if xmax <= xmin or ymax <= ymin:
                    print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path))
                    continue

                # 将box信息转换到yolo格式
                xcenter = xmin + (xmax - xmin) / 2
                ycenter = ymin + (ymax - ymin) / 2
                w = xmax - xmin
                h = ymax - ymin

                # 绝对坐标转相对坐标，保存6位小数
                xcenter = round(xcenter / img_width, 6)
                ycenter = round(ycenter / img_height, 6)
                w = round(w / img_width, 6)
                h = round(h / img_height, 6)

                info = [str(i) for i in [class_index, xcenter, ycenter, w, h]]

                if index == 0:
                    f.write(" ".join(info))
                else:
                    f.write("\n" + " ".join(info))

        # copy image into save_images_path
        path_copy_to = os.path.join(save_images_path, img_path.split(os.sep)[-1])
        if os.path.exists(path_copy_to) is False:
            shutil.copyfile(img_path, path_copy_to)


def create_class_names(class_dict: dict):
    keys = class_dict.keys()
    with open("./data/my_data_label.names", "w") as w:
        for index, k in enumerate(keys):
            if index + 1 == len(keys):
                w.write(k)
            else:
                w.write(k + "\n")


def main():
    # read class_indict
    json_file = open(label_json_path, 'r')
    class_dict = json.load(json_file)

    # 读取train.txt中的所有行信息，删除空行
    with open(train_txt_path, "r") as r:
        train_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0]
    # voc信息转yolo，并将图像文件复制到相应文件夹
    translate_info(train_file_names, save_file_root, class_dict, "train")

    # 读取val.txt中的所有行信息，删除空行
    with open(val_txt_path, "r") as r:
        val_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0]
    # voc信息转yolo，并将图像文件复制到相应文件夹
    translate_info(val_file_names, save_file_root, class_dict, "val")

    # 创建my_data_label.names文件
    create_class_names(class_dict)


if __name__ == "__main__":
    main()


================================================
FILE: pytorch_object_detection/yolov3_spp/validation.py
================================================
"""
该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标
以及每个类别的mAP(IoU=0.5)
"""
import json

from models import *
from build_utils.datasets import *
from build_utils.utils import *
from train_utils import get_coco_api_from_dataset, CocoEvaluator


def summarize(self, catId=None):
    """
    Compute and display summary metrics for evaluation results.
    Note this functin can *only* be applied on the default parameter setting
    """

    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
        p = self.params
        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
        typeStr = '(AP)' if ap == 1 else '(AR)'
        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
            if iouThr is None else '{:0.2f}'.format(iouThr)

        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

        if ap == 1:
            # dimension of precision: [TxRxKxAxM]
            s = self.eval['precision']
            # IoU
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, :, catId, aind, mind]
            else:
                s = s[:, :, :, aind, mind]

        else:
            # dimension of recall: [TxKxAxM]
            s = self.eval['recall']
            if iouThr is not None:
                t = np.where(iouThr == p.iouThrs)[0]
                s = s[t]

            if isinstance(catId, int):
                s = s[:, catId, aind, mind]
            else:
                s = s[:, :, aind, mind]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
        return mean_s, print_string

    stats, print_list = [0] * 12, [""] * 12
    stats[0], print_list[0] = _summarize(1)
    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])
    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])
    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])
    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])

    print_info = "\n".join(print_list)

    if not self.eval:
        raise Exception('Please run accumulate() first')

    return stats, print_info


def main(parser_data):
    device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    # read class_indict
    label_json_path = './data/pascal_voc_classes.json'
    assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path)
    with open(label_json_path, 'r') as f:
        class_dict = json.load(f)

    category_index = {v: k for k, v in class_dict.items()}

    data_dict = parse_data_cfg(parser_data.data)
    test_path = data_dict["valid"]

    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = LoadImagesAndLabels(test_path, parser_data.img_size, batch_size,
                                      hyp=parser_data.hyp,
                                      rect=True)  # 将每个batch的图像调整到合适大小，可减少运算量(并不是512x512标准尺寸)

    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=nw,
                                                     pin_memory=True,
                                                     collate_fn=val_dataset.collate_fn)

    # create model
    model = Darknet(parser_data.cfg, parser_data.img_size)
    weights_dict = torch.load(parser_data.weights, map_location='cpu')
    weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict
    model.load_state_dict(weights_dict)
    model.to(device)

    # evaluate on the test dataset
    coco = get_coco_api_from_dataset(val_dataset)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)
    cpu_device = torch.device("cpu")

    model.eval()
    with torch.no_grad():
        for imgs, targets, paths, shapes, img_index in tqdm(val_dataset_loader, desc="validation..."):
            imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0

            pred = model(imgs)[0]  # only get inference result
            pred = non_max_suppression(pred, conf_thres=0.01, iou_thres=0.6, multi_label=False)

            outputs = []
            for index, p in enumerate(pred):
                if p is None:
                    p = torch.empty((0, 6), device=cpu_device)
                    boxes = torch.empty((0, 4), device=cpu_device)
                else:
                    # xmin, ymin, xmax, ymax
                    boxes = p[:, :4]
                    # shapes: (h0, w0), ((h / h0, w / w0), pad)
                    # 将boxes信息还原回原图尺度，这样计算的mAP才是准确的
                    boxes = scale_coords(imgs[index].shape[1:], boxes, shapes[index][0]).round()

                # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax，且为绝对坐标
                info = {"boxes": boxes.to(cpu_device),
                        "labels": p[:, 5].to(device=cpu_device, dtype=torch.int64),
                        "scores": p[:, 4].to(cpu_device)}
                outputs.append(info)

            res = {img_id: output for img_id, output in zip(img_index, outputs)}

            coco_evaluator.update(res)

    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_eval = coco_evaluator.coco_eval["bbox"]
    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_eval)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    for i in range(len(category_index)):
        stats, _ = summarize(coco_eval, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = ["COCO results:",
                        print_coco,
                        "",
                        "mAP(IoU=0.5) for each category:",
                        print_voc]
        f.write("\n".join(record_lines))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 使用设备类型
    parser.add_argument('--device', default='cuda', help='device')

    # 检测目标类别数
    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')

    parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help="*.cfg path")
    parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path')
    parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path')
    parser.add_argument('--img-size', type=int, default=512, help='test size')

    # 训练好的权重文件
    parser.add_argument('--weights', default='./weights/yolov3spp-voc-512.pt', type=str, help='training weights')

    # batch size
    parser.add_argument('--batch_size', default=1, type=int, metavar='N',
                        help='batch size when validation.')

    args = parser.parse_args()

    main(args)


================================================
FILE: pytorch_segmentation/deeplab_v3/README.md
================================================
# DeepLabV3(Rethinking Atrous Convolution for Semantic Image Segmentation)

## 该项目主要是来自pytorch官方torchvision模块中的源码
* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10
* Ubuntu或Centos(Windows暂不支持多GPU训练)
* 最好使用GPU训练
* 详细环境配置见```requirements.txt```

## 文件结构：
```
  ├── src: 模型的backbone以及DeepLabv3的搭建
  ├── train_utils: 训练、验证以及多GPU训练相关模块
  ├── my_dataset.py: 自定义dataset用于读取VOC数据集
  ├── train.py: 以deeplabv3_resnet50为例进行训练
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件
  └── pascal_voc_classes.json: pascal_voc标签文件
```

## 预训练权重下载地址：
* 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)
* deeplabv3_resnet50: https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth
* deeplabv3_resnet101: https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth
* deeplabv3_mobilenetv3_large_coco: https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth
* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```deeplabv3_resnet50_coco.pth```文件，
  不是```deeplabv3_resnet50_coco-cd0a2569.pth```
 
 
## 数据集，本例程使用的是PASCAL VOC2012数据集
* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)
* ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```

## 注意事项
* 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**
* 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可，其他代码尽量不要改动

## 如果对DeepLabV3原理不是很理解可参考我的bilibili
* https://www.bilibili.com/video/BV1Jb4y1q7j7


## 进一步了解该项目，以及对DeepLabV3代码的分析可参考我的bilibili
* https://www.bilibili.com/video/BV1TD4y1c7Wx

## Pytorch官方实现的DeeplabV3网络框架图
![deeplabv3_resnet50_pytorch](./deeplabv3_resnet50.png)


================================================
FILE: pytorch_segmentation/deeplab_v3/get_palette.py
================================================
import json
import numpy as np
from PIL import Image

# 读取mask标签
target = Image.open("./2007_001288.png")
# 获取调色板
palette = target.getpalette()
palette = np.reshape(palette, (-1, 3)).tolist()
# 转换成字典子形式
pd = dict((i, color) for i, color in enumerate(palette))

json_str = json.dumps(pd)
with open("palette.json", "w") as f:
    f.write(json_str)

# target = np.array(target)
# print(target)


================================================
FILE: pytorch_segmentation/deeplab_v3/my_dataset.py
================================================
import os

import torch.utils.data as data
from PIL import Image


class VOCSegmentation(data.Dataset):
    def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
        super(VOCSegmentation, self).__init__()
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        assert os.path.exists(root), "path '{}' does not exist.".format(root)
        image_dir = os.path.join(root, 'JPEGImages')
        mask_dir = os.path.join(root, 'SegmentationClass')

        txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
        assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
        with open(os.path.join(txt_path), "r") as f:
            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
        assert (len(self.images) == len(self.masks))
        self.transforms = transforms

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is the image segmentation.
        """
        img = Image.open(self.images[index]).convert('RGB')
        target = Image.open(self.masks[index])

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.images)

    @staticmethod
    def collate_fn(batch):
        images, targets = list(zip(*batch))
        batched_imgs = cat_list(images, fill_value=0)
        batched_targets = cat_list(targets, fill_value=255)
        return batched_imgs, batched_targets


def cat_list(images, fill_value=0):
    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
    batch_shape = (len(images),) + max_size
    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
    for img, pad_img in zip(images, batched_imgs):
        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
    return batched_imgs


# dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True))
# d1 = dataset[0]
# print(d1)


================================================
FILE: pytorch_segmentation/deeplab_v3/palette.json
================================================
{"0": [0, 0, 0], "1": [128, 0, 0], "2": [0, 128, 0], "3": [128, 128, 0], "4": [0, 0, 128], "5": [128, 0, 128], "6": [0, 128, 128], "7": [128, 128, 128], "8": [64, 0, 0], "9": [192, 0, 0], "10": [64, 128, 0], "11": [192, 128, 0], "12": [64, 0, 128], "13": [192, 0, 128], "14": [64, 128, 128], "15": [192, 128, 128], "16": [0, 64, 0], "17": [128, 64, 0], "18": [0, 192, 0], "19": [128, 192, 0], "20": [0, 64, 128], "21": [128, 64, 128], "22": [0, 192, 128], "23": [128, 192, 128], "24": [64, 64, 0], "25": [192, 64, 0], "26": [64, 192, 0], "27": [192, 192, 0], "28": [64, 64, 128], "29": [192, 64, 128], "30": [64, 192, 128], "31": [192, 192, 128], "32": [0, 0, 64], "33": [128, 0, 64], "34": [0, 128, 64], "35": [128, 128, 64], "36": [0, 0, 192], "37": [128, 0, 192], "38": [0, 128, 192], "39": [128, 128, 192], "40": [64, 0, 64], "41": [192, 0, 64], "42": [64, 128, 64], "43": [192, 128, 64], "44": [64, 0, 192], "45": [192, 0, 192], "46": [64, 128, 192], "47": [192, 128, 192], "48": [0, 64, 64], "49": [128, 64, 64], "50": [0, 192, 64], "51": [128, 192, 64], "52": [0, 64, 192], "53": [128, 64, 192], "54": [0, 192, 192], "55": [128, 192, 192], "56": [64, 64, 64], "57": [192, 64, 64], "58": [64, 192, 64], "59": [192, 192, 64], "60": [64, 64, 192], "61": [192, 64, 192], "62": [64, 192, 192], "63": [192, 192, 192], "64": [32, 0, 0], "65": [160, 0, 0], "66": [32, 128, 0], "67": [160, 128, 0], "68": [32, 0, 128], "69": [160, 0, 128], "70": [32, 128, 128], "71": [160, 128, 128], "72": [96, 0, 0], "73": [224, 0, 0], "74": [96, 128, 0], "75": [224, 128, 0], "76": [96, 0, 128], "77": [224, 0, 128], "78": [96, 128, 128], "79": [224, 128, 128], "80": [32, 64, 0], "81": [160, 64, 0], "82": [32, 192, 0], "83": [160, 192, 0], "84": [32, 64, 128], "85": [160, 64, 128], "86": [32, 192, 128], "87": [160, 192, 128], "88": [96, 64, 0], "89": [224, 64, 0], "90": [96, 192, 0], "91": [224, 192, 0], "92": [96, 64, 128], "93": [224, 64, 128], "94": [96, 192, 128], "95": [224, 192, 128], "96": [32, 0, 64], "97": [160, 0, 64], "98": [32, 128, 64], "99": [160, 128, 64], "100": [32, 0, 192], "101": [160, 0, 192], "102": [32, 128, 192], "103": [160, 128, 192], "104": [96, 0, 64], "105": [224, 0, 64], "106": [96, 128, 64], "107": [224, 128, 64], "108": [96, 0, 192], "109": [224, 0, 192], "110": [96, 128, 192], "111": [224, 128, 192], "112": [32, 64, 64], "113": [160, 64, 64], "114": [32, 192, 64], "115": [160, 192, 64], "116": [32, 64, 192], "117": [160, 64, 192], "118": [32, 192, 192], "119": [160, 192, 192], "120": [96, 64, 64], "121": [224, 64, 64], "122": [96, 192, 64], "123": [224, 192, 64], "124": [96, 64, 192], "125": [224, 64, 192], "126": [96, 192, 192], "127": [224, 192, 192], "128": [0, 32, 0], "129": [128, 32, 0], "130": [0, 160, 0], "131": [128, 160, 0], "132": [0, 32, 128], "133": [128, 32, 128], "134": [0, 160, 128], "135": [128, 160, 128], "136": [64, 32, 0], "137": [192, 32, 0], "138": [64, 160, 0], "139": [192, 160, 0], "140": [64, 32, 128], "141": [192, 32, 128], "142": [64, 160, 128], "143": [192, 160, 128], "144": [0, 96, 0], "145": [128, 96, 0], "146": [0, 224, 0], "147": [128, 224, 0], "148": [0, 96, 128], "149": [128, 96, 128], "150": [0, 224, 128], "151": [128, 224, 128], "152": [64, 96, 0], "153": [192, 96, 0], "154": [64, 224, 0], "155": [192, 224, 0], "156": [64, 96, 128], "157": [192, 96, 128], "158": [64, 224, 128], "159": [192, 224, 128], "160": [0, 32, 64], "161": [128, 32, 64], "162": [0, 160, 64], "163": [128, 160, 64], "164": [0, 32, 192], "165": [128, 32, 192], "166": [0, 160, 192], "167": [128, 160, 192], "168": [64, 32, 64], "169": [192, 32, 64], "170": [64, 160, 64], "171": [192, 160, 64], "172": [64, 32, 192], "173": [192, 32, 192], "174": [64, 160, 192], "175": [192, 160, 192], "176": [0, 96, 64], "177": [128, 96, 64], "178": [0, 224, 64], "179": [128, 224, 64], "180": [0, 96, 192], "181": [128, 96, 192], "182": [0, 224, 192], "183": [128, 224, 192], "184": [64, 96, 64], "185": [192, 96, 64], "186": [64, 224, 64], "187": [192, 224, 64], "188": [64, 96, 192], "189": [192, 96, 192], "190": [64, 224, 192], "191": [192, 224, 192], "192": [32, 32, 0], "193": [160, 32, 0], "194": [32, 160, 0], "195": [160, 160, 0], "196": [32, 32, 128], "197": [160, 32, 128], "198": [32, 160, 128], "199": [160, 160, 128], "200": [96, 32, 0], "201": [224, 32, 0], "202": [96, 160, 0], "203": [224, 160, 0], "204": [96, 32, 128], "205": [224, 32, 128], "206": [96, 160, 128], "207": [224, 160, 128], "208": [32, 96, 0], "209": [160, 96, 0], "210": [32, 224, 0], "211": [160, 224, 0], "212": [32, 96, 128], "213": [160, 96, 128], "214": [32, 224, 128], "215": [160, 224, 128], "216": [96, 96, 0], "217": [224, 96, 0], "218": [96, 224, 0], "219": [224, 224, 0], "220": [96, 96, 128], "221": [224, 96, 128], "222": [96, 224, 128], "223": [224, 224, 128], "224": [32, 32, 64], "225": [160, 32, 64], "226": [32, 160, 64], "227": [160, 160, 64], "228": [32, 32, 192], "229": [160, 32, 192], "230": [32, 160, 192], "231": [160, 160, 192], "232": [96, 32, 64], "233": [224, 32, 64], "234": [96, 160, 64], "235": [224, 160, 64], "236": [96, 32, 192], "237": [224, 32, 192], "238": [96, 160, 192], "239": [224, 160, 192], "240": [32, 96, 64], "241": [160, 96, 64], "242": [32, 224, 64], "243": [160, 224, 64], "244": [32, 96, 192], "245": [160, 96, 192], "246": [32, 224, 192], "247": [160, 224, 192], "248": [96, 96, 64], "249": [224, 96, 64], "250": [96, 224, 64], "251": [224, 224, 64], "252": [96, 96, 192], "253": [224, 96, 192], "254": [96, 224, 192], "255": [224, 224, 192]}

================================================
FILE: pytorch_segmentation/deeplab_v3/pascal_voc_classes.json
================================================
{
    "aeroplane": 1,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
}

================================================
FILE: pytorch_segmentation/deeplab_v3/predict.py
================================================
import os
import time
import json

import torch
from torchvision import transforms
import numpy as np
from PIL import Image

from src import deeplabv3_resnet50


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    aux = False  # inference time not need aux_classifier
    classes = 20
    weights_path = "./save_weights/model_29.pth"
    img_path = "./test.jpg"
    palette_path = "./palette.json"
    assert os.path.exists(weights_path), f"weights {weights_path} not found."
    assert os.path.exists(img_path), f"image {img_path} not found."
    assert os.path.exists(palette_path), f"palette {palette_path} not found."
    with open(palette_path, "rb") as f:
        pallette_dict = json.load(f)
        pallette = []
        for v in pallette_dict.values():
            pallette += v

    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = deeplabv3_resnet50(aux=aux, num_classes=classes+1)

    # delete weights about aux_classifier
    weights_dict = torch.load(weights_path, map_location='cpu')['model']
    for k in list(weights_dict.keys()):
        if "aux" in k:
            del weights_dict[k]

    # load weights
    model.load_state_dict(weights_dict)
    model.to(device)

    # load image
    original_img = Image.open(img_path)

    # from pil image to tensor and normalize
    data_transform = transforms.Compose([transforms.Resize(520),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                                              std=(0.229, 0.224, 0.225))])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init model
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        output = model(img.to(device))
        t_end = time_synchronized()
        print("inference time: {}".format(t_end - t_start))

        prediction = output['out'].argmax(1).squeeze(0)
        prediction = prediction.to("cpu").numpy().astype(np.uint8)
        mask = Image.fromarray(prediction)
        mask.putpalette(pallette)
        mask.save("test_result.png")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/deeplab_v3/requirements.txt
================================================
numpy==1.22.0
torch==1.10.0
torchvision==0.11.1
Pillow


================================================
FILE: pytorch_segmentation/deeplab_v3/results20211027-104607.txt
================================================
[epoch: 0]
train_loss: 0.7098
lr: 0.000100
global correct: 94.7
average row correct: ['97.0', '93.9', '86.4', '93.1', '74.4', '64.0', '97.9', '84.7', '95.7', '63.8', '92.2', '68.8', '88.8', '90.5', '93.0', '95.3', '75.3', '94.2', '83.6', '91.9', '75.0']
IoU: ['93.9', '91.5', '42.6', '88.9', '65.3', '60.4', '95.9', '76.3', '90.7', '50.9', '87.3', '54.1', '86.1', '83.0', '87.2', '89.3', '64.3', '91.0', '58.9', '84.5', '73.6']
mean IoU: 76.9

[epoch: 1]
train_loss: 0.6005
lr: 0.000077
global correct: 94.7
average row correct: ['96.1', '96.3', '84.1', '95.1', '84.1', '81.4', '98.6', '85.8', '96.6', '68.7', '91.9', '71.3', '93.9', '91.4', '96.4', '95.5', '81.0', '94.1', '85.4', '94.9', '83.6']
IoU: ['93.9', '92.3', '42.2', '88.8', '69.1', '71.2', '96.1', '75.1', '91.9', '48.9', '87.8', '56.0', '87.9', '85.7', '89.2', '89.5', '63.6', '90.3', '56.2', '85.3', '79.8']
mean IoU: 78.1

[epoch: 2]
train_loss: 0.5840
lr: 0.000054
global correct: 94.8
average row correct: ['96.2', '95.5', '85.8', '94.6', '85.5', '83.7', '98.8', '87.5', '96.3', '71.4', '92.5', '72.8', '93.1', '91.9', '96.7', '94.9', '81.5', '95.3', '82.8', '95.3', '84.1']
IoU: ['94.0', '91.2', '42.7', '88.3', '69.2', '72.7', '96.4', '74.8', '92.0', '49.8', '87.5', '58.3', '87.3', '85.0', '89.3', '89.2', '62.6', '89.6', '58.1', '84.8', '80.3']
mean IoU: 78.2

[epoch: 3]
train_loss: 0.5637
lr: 0.000029
global correct: 94.8
average row correct: ['96.1', '95.9', '81.7', '94.8', '86.5', '79.4', '99.0', '89.1', '95.8', '71.4', '93.8', '71.0', '93.4', '92.4', '97.3', '94.9', '80.4', '96.9', '83.3', '94.7', '84.4']
IoU: ['94.0', '89.5', '41.8', '87.6', '69.0', '70.4', '96.0', '75.9', '92.1', '49.7', '87.3', '58.1', '86.2', '83.9', '88.7', '89.2', '63.7', '88.8', '57.7', '85.3', '79.9']
mean IoU: 77.8

[epoch: 4]
train_loss: 0.5779
lr: 0.000000
global correct: 94.8
average row correct: ['96.3', '93.6', '85.9', '95.1', '82.6', '83.8', '98.5', '90.0', '95.9', '71.1', '93.2', '68.4', '92.6', '93.9', '95.9', '94.5', '82.8', '96.3', '82.8', '94.5', '86.4']
IoU: ['94.1', '91.8', '42.5', '88.5', '67.8', '72.1', '96.6', '78.3', '92.0', '49.8', '88.3', '58.8', '86.7', '84.9', '89.0', '89.5', '61.0', '89.1', '56.6', '84.6', '80.2']
mean IoU: 78.2


================================================
FILE: pytorch_segmentation/deeplab_v3/src/__init__.py
================================================
from .deeplabv3_model import deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenetv3_large


================================================
FILE: pytorch_segmentation/deeplab_v3/src/deeplabv3_model.py
================================================
from collections import OrderedDict

from typing import Dict, List

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from .resnet_backbone import resnet50, resnet101
from .mobilenet_backbone import mobilenet_v3_large


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model

    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.

    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.

    Args:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    _version = 2
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")
        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}

        # 重新构建backbone，将没有使用到的模块全部删掉
        layers = OrderedDict()
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        out = OrderedDict()
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class DeepLabV3(nn.Module):
    """
    Implements DeepLabV3 model from
    `"Rethinking Atrous Convolution for Semantic Image Segmentation"
    <https://arxiv.org/abs/1706.05587>`_.

    Args:
        backbone (nn.Module): the network used to compute the features for the model.
            The backbone should return an OrderedDict[Tensor], with the key being
            "out" for the last feature map used, and "aux" if an auxiliary classifier
            is used.
        classifier (nn.Module): module that takes the "out" element returned from
            the backbone and returns a dense prediction.
        aux_classifier (nn.Module, optional): auxiliary classifier used during training
    """
    __constants__ = ['aux_classifier']

    def __init__(self, backbone, classifier, aux_classifier=None):
        super(DeepLabV3, self).__init__()
        self.backbone = backbone
        self.classifier = classifier
        self.aux_classifier = aux_classifier

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        input_shape = x.shape[-2:]
        # contract: features is a dict of tensors
        features = self.backbone(x)

        result = OrderedDict()
        x = features["out"]
        x = self.classifier(x)
        # 使用双线性插值还原回原图尺度
        x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
        result["out"] = x

        if self.aux_classifier is not None:
            x = features["aux"]
            x = self.aux_classifier(x)
            # 使用双线性插值还原回原图尺度
            x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
            result["aux"] = x

        return result


class FCNHead(nn.Sequential):
    def __init__(self, in_channels, channels):
        inter_channels = in_channels // 4
        super(FCNHead, self).__init__(
            nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(inter_channels),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Conv2d(inter_channels, channels, 1)
        )


class ASPPConv(nn.Sequential):
    def __init__(self, in_channels: int, out_channels: int, dilation: int) -> None:
        super(ASPPConv, self).__init__(
            nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )


class ASPPPooling(nn.Sequential):
    def __init__(self, in_channels: int, out_channels: int) -> None:
        super(ASPPPooling, self).__init__(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        size = x.shape[-2:]
        for mod in self:
            x = mod(x)
        return F.interpolate(x, size=size, mode='bilinear', align_corners=False)


class ASPP(nn.Module):
    def __init__(self, in_channels: int, atrous_rates: List[int], out_channels: int = 256) -> None:
        super(ASPP, self).__init__()
        modules = [
            nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False),
                          nn.BatchNorm2d(out_channels),
                          nn.ReLU())
        ]

        rates = tuple(atrous_rates)
        for rate in rates:
            modules.append(ASPPConv(in_channels, out_channels, rate))

        modules.append(ASPPPooling(in_channels, out_channels))

        self.convs = nn.ModuleList(modules)

        self.project = nn.Sequential(
            nn.Conv2d(len(self.convs) * out_channels, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        _res = []
        for conv in self.convs:
            _res.append(conv(x))
        res = torch.cat(_res, dim=1)
        return self.project(res)


class DeepLabHead(nn.Sequential):
    def __init__(self, in_channels: int, num_classes: int) -> None:
        super(DeepLabHead, self).__init__(
            ASPP(in_channels, [12, 24, 36]),
            nn.Conv2d(256, 256, 3, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, num_classes, 1)
        )


def deeplabv3_resnet50(aux, num_classes=21, pretrain_backbone=False):
    # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth'
    # 'deeplabv3_resnet50_coco': 'https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth'
    backbone = resnet50(replace_stride_with_dilation=[False, True, True])

    if pretrain_backbone:
        # 载入resnet50 backbone预训练权重
        backbone.load_state_dict(torch.load("resnet50.pth", map_location='cpu'))

    out_inplanes = 2048
    aux_inplanes = 1024

    return_layers = {'layer4': 'out'}
    if aux:
        return_layers['layer3'] = 'aux'
    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

    aux_classifier = None
    # why using aux: https://github.com/pytorch/vision/issues/4292
    if aux:
        aux_classifier = FCNHead(aux_inplanes, num_classes)

    classifier = DeepLabHead(out_inplanes, num_classes)

    model = DeepLabV3(backbone, classifier, aux_classifier)

    return model


def deeplabv3_resnet101(aux, num_classes=21, pretrain_backbone=False):
    # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth'
    # 'deeplabv3_resnet101_coco': 'https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth'
    backbone = resnet101(replace_stride_with_dilation=[False, True, True])

    if pretrain_backbone:
        # 载入resnet101 backbone预训练权重
        backbone.load_state_dict(torch.load("resnet101.pth", map_location='cpu'))

    out_inplanes = 2048
    aux_inplanes = 1024

    return_layers = {'layer4': 'out'}
    if aux:
        return_layers['layer3'] = 'aux'
    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

    aux_classifier = None
    # why using aux: https://github.com/pytorch/vision/issues/4292
    if aux:
        aux_classifier = FCNHead(aux_inplanes, num_classes)

    classifier = DeepLabHead(out_inplanes, num_classes)

    model = DeepLabV3(backbone, classifier, aux_classifier)

    return model


def deeplabv3_mobilenetv3_large(aux, num_classes=21, pretrain_backbone=False):
    # 'mobilenetv3_large_imagenet': 'https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth'
    # 'depv3_mobilenetv3_large_coco': "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth"
    backbone = mobilenet_v3_large(dilated=True)

    if pretrain_backbone:
        # 载入mobilenetv3 large backbone预训练权重
        backbone.load_state_dict(torch.load("mobilenet_v3_large.pth", map_location='cpu'))

    backbone = backbone.features

    # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
    # The first and last blocks are always included because they are the C0 (conv1) and Cn.
    stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "is_strided", False)] + [len(backbone) - 1]
    out_pos = stage_indices[-1]  # use C5 which has output_stride = 16
    out_inplanes = backbone[out_pos].out_channels
    aux_pos = stage_indices[-4]  # use C2 here which has output_stride = 8
    aux_inplanes = backbone[aux_pos].out_channels
    return_layers = {str(out_pos): "out"}
    if aux:
        return_layers[str(aux_pos)] = "aux"

    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

    aux_classifier = None
    # why using aux: https://github.com/pytorch/vision/issues/4292
    if aux:
        aux_classifier = FCNHead(aux_inplanes, num_classes)

    classifier = DeepLabHead(out_inplanes, num_classes)

    model = DeepLabV3(backbone, classifier, aux_classifier)

    return model


================================================
FILE: pytorch_segmentation/deeplab_v3/src/mobilenet_backbone.py
================================================
from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None,
                 dilation: int = 1):
        padding = (kernel_size - 1) // 2 * dilation
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         dilation=dilation,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer(inplace=True))
        self.out_channels = out_planes


class SqueezeExcitation(nn.Module):
    def __init__(self, input_c: int, squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)
        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)
        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = F.relu(scale, inplace=True)
        scale = self.fc2(scale)
        scale = F.hardsigmoid(scale, inplace=True)
        return scale * x


class InvertedResidualConfig:
    def __init__(self,
                 input_c: int,
                 kernel: int,
                 expanded_c: int,
                 out_c: int,
                 use_se: bool,
                 activation: str,
                 stride: int,
                 dilation: int,
                 width_multi: float):
        self.input_c = self.adjust_channels(input_c, width_multi)
        self.kernel = kernel
        self.expanded_c = self.adjust_channels(expanded_c, width_multi)
        self.out_c = self.adjust_channels(out_c, width_multi)
        self.use_se = use_se
        self.use_hs = activation == "HS"  # whether using h-swish activation
        self.stride = stride
        self.dilation = dilation

    @staticmethod
    def adjust_channels(channels: int, width_multi: float):
        return _make_divisible(channels * width_multi, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        layers: List[nn.Module] = []
        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU

        # expand
        if cnf.expanded_c != cnf.input_c:
            layers.append(ConvBNActivation(cnf.input_c,
                                           cnf.expanded_c,
                                           kernel_size=1,
                                           norm_layer=norm_layer,
                                           activation_layer=activation_layer))

        # depthwise
        stride = 1 if cnf.dilation > 1 else cnf.stride
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.expanded_c,
                                       kernel_size=cnf.kernel,
                                       stride=stride,
                                       dilation=cnf.dilation,
                                       groups=cnf.expanded_c,
                                       norm_layer=norm_layer,
                                       activation_layer=activation_layer))

        if cnf.use_se:
            layers.append(SqueezeExcitation(cnf.expanded_c))

        # project
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.out_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Identity))

        self.block = nn.Sequential(*layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        if self.use_res_connect:
            result += x

        return result


class MobileNetV3(nn.Module):
    def __init__(self,
                 inverted_residual_setting: List[InvertedResidualConfig],
                 last_channel: int,
                 num_classes: int = 1000,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super(MobileNetV3, self).__init__()

        if not inverted_residual_setting:
            raise ValueError("The inverted_residual_setting should not be empty.")
        elif not (isinstance(inverted_residual_setting, List) and
                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
            raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers: List[nn.Module] = []

        # building first layer
        firstconv_output_c = inverted_residual_setting[0].input_c
        layers.append(ConvBNActivation(3,
                                       firstconv_output_c,
                                       kernel_size=3,
                                       stride=2,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.append(block(cnf, norm_layer))

        # building last several layers
        lastconv_input_c = inverted_residual_setting[-1].out_c
        lastconv_output_c = 6 * lastconv_input_c
        layers.append(ConvBNActivation(lastconv_input_c,
                                       lastconv_output_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),
                                        nn.Hardswish(inplace=True),
                                        nn.Dropout(p=0.2, inplace=True),
                                        nn.Linear(last_channel, num_classes))

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def mobilenet_v3_large(num_classes: int = 1000,
                       reduced_tail: bool = False,
                       dilated: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
        dilated: whether using dilated conv
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1
    dilation = 2 if dilated else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation
        bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
        bneck_conf(16, 3, 64, 24, False, "RE", 2, 1),  # C1
        bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
        bneck_conf(24, 5, 72, 40, True, "RE", 2, 1),  # C2
        bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
        bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
        bneck_conf(40, 3, 240, 80, False, "HS", 2, 1),  # C3
        bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
        bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
        bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
        bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation),  # C4
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
    ]
    last_channel = adjust_channels(1280 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


def mobilenet_v3_small(num_classes: int = 1000,
                       reduced_tail: bool = False,
                       dilated: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
        dilated: whether using dilated conv
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1
    dilation = 2 if dilated else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation
        bneck_conf(16, 3, 16, 16, True, "RE", 2, 1),  # C1
        bneck_conf(16, 3, 72, 24, False, "RE", 2, 1),  # C2
        bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
        bneck_conf(24, 5, 96, 40, True, "HS", 2, 1),  # C3
        bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
        bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
        bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
        bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
        bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation),  # C4
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation)
    ]
    last_channel = adjust_channels(1024 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


================================================
FILE: pytorch_segmentation/deeplab_v3/src/resnet_backbone.py
================================================
import torch
import torch.nn as nn


def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x):
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def forward(self, x):
        return self._forward_impl(x)


def _resnet(block, layers, **kwargs):
    model = ResNet(block, layers, **kwargs)
    return model


def resnet50(**kwargs):
    r"""ResNet-50 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    r"""ResNet-101 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)


================================================
FILE: pytorch_segmentation/deeplab_v3/train.py
================================================
import os
import time
import datetime

import torch

from src import deeplabv3_resnet50
from train_utils import train_one_epoch, evaluate, create_lr_scheduler
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(2.0 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train):
    base_size = 520
    crop_size = 480

    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)


def create_model(aux, num_classes, pretrain=True):
    model = deeplabv3_resnet50(aux=aux, num_classes=num_classes)

    if pretrain:
        weights_dict = torch.load("./deeplabv3_resnet50_coco.pth", map_location='cpu')

        if num_classes != 21:
            # 官方提供的预训练权重是21类(包括背景)
            # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错
            for k in list(weights_dict.keys()):
                if "classifier.4" in k:
                    del weights_dict[k]

        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
        if len(missing_keys) != 0 or len(unexpected_keys) != 0:
            print("missing_keys: ", missing_keys)
            print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # 用来保存训练以及验证过程中信息
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt
    train_dataset = VOCSegmentation(args.data_path,
                                    year="2012",
                                    transforms=get_transform(train=True),
                                    txt_name="train.txt")

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=get_transform(train=False),
                                  txt_name="val.txt")

    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True,
                                               pin_memory=True,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(aux=args.aux, num_classes=num_classes)
    model.to(device)

    params_to_optimize = [
        {"params": [p for p in model.backbone.parameters() if p.requires_grad]},
        {"params": [p for p in model.classifier.parameters() if p.requires_grad]}
    ]

    if args.aux:
        params = [p for p in model.aux_classifier.parameters() if p.requires_grad]
        params_to_optimize.append({"params": params, "lr": args.lr * 10})

    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay
    )

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)

    # import matplotlib.pyplot as plt
    # lr_list = []
    # for _ in range(args.epochs):
    #     for _ in range(len(train_loader)):
    #         lr_scheduler.step()
    #         lr = optimizer.param_groups[0]["lr"]
    #         lr_list.append(lr)
    # plt.plot(range(len(lr_list)), lr_list)
    # plt.show()

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        # write into txt
        with open(results_file, "a") as f:
            # 记录每个epoch对应的train_loss、lr以及验证集各指标
            train_info = f"[epoch: {epoch}]\n" \
                         f"train_loss: {mean_loss:.4f}\n" \
                         f"lr: {lr:.6f}\n"
            f.write(train_info + val_info + "\n\n")

        save_file = {"model": model.state_dict(),
                     "optimizer": optimizer.state_dict(),
                     "lr_scheduler": lr_scheduler.state_dict(),
                     "epoch": epoch,
                     "args": args}
        if args.amp:
            save_file["scaler"] = scaler.state_dict()
        torch.save(save_file, "save_weights/model_{}.pth".format(epoch))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("training time {}".format(total_time_str))


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch deeplabv3 training")

    parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
    parser.add_argument("--num-classes", default=20, type=int)
    parser.add_argument("--aux", default=True, type=bool, help="auxilier loss")
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument("-b", "--batch-size", default=4, type=int)
    parser.add_argument("--epochs", default=30, type=int, metavar="N",
                        help="number of total epochs to train")

    parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()

    if not os.path.exists("./save_weights"):
        os.mkdir("./save_weights")

    main(args)


================================================
FILE: pytorch_segmentation/deeplab_v3/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch

from src import deeplabv3_resnet50
from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(2.0 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train):
    base_size = 520
    crop_size = 480

    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)


def create_model(aux, num_classes):
    model = deeplabv3_resnet50(aux=aux, num_classes=num_classes)
    weights_dict = torch.load("./deeplabv3_resnet50_coco.pth", map_location='cpu')

    if num_classes != 21:
        # 官方提供的预训练权重是21类(包括背景)
        # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错
        for k in list(weights_dict.keys()):
            if "classifier.4" in k:
                del weights_dict[k]

    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt
    train_dataset = VOCSegmentation(args.data_path,
                                    year="2012",
                                    transforms=get_transform(train=True),
                                    txt_name="train.txt")
    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=get_transform(train=False),
                                  txt_name="val.txt")

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=train_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn, drop_last=True)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + 20 classes
    model = create_model(aux=args.aux, num_classes=num_classes)
    model.to(device)

    if args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params_to_optimize = [
        {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]},
        {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]},
    ]
    if args.aux:
        params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad]
        params_to_optimize.append({"params": params, "lr": args.lr * 10})
    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 记录每个epoch对应的train_loss、lr以及验证集各指标
                train_info = f"[epoch: {epoch}]\n" \
                             f"train_loss: {mean_loss:.4f}\n" \
                             f"lr: {lr:.6f}\n"
                f.write(train_info + val_info + "\n\n")

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_file = {'model': model_without_ddp.state_dict(),
                         'optimizer': optimizer.state_dict(),
                         'lr_scheduler': lr_scheduler.state_dict(),
                         'args': args,
                         'epoch': epoch}
            if args.amp:
                save_file["scaler"] = scaler.state_dict()
            save_on_master(save_file,
                           os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data/', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    parser.add_argument("--aux", default=True, type=bool, help="auxilier loss")
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=20, type=int, metavar='N',
                        help='number of total epochs to run')
    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢
    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 训练学习率，这里默认设置成0.0001，如果效果不好可以尝试加大学习率
    parser.add_argument('--lr', default=0.0001, type=float,
                        help='initial learning rate')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 分布式进程数
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_segmentation/deeplab_v3/train_utils/__init__.py
================================================
from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
from .distributed_utils import init_distributed_mode, save_on_master, mkdir


================================================
FILE: pytorch_segmentation/deeplab_v3/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import time
import torch
import torch.distributed as dist

import errno
import os


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


class ConfusionMatrix(object):
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.mat = None

    def update(self, a, b):
        n = self.num_classes
        if self.mat is None:
            # 创建混淆矩阵
            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
        with torch.no_grad():
            # 寻找GT中为目标的像素索引
            k = (a >= 0) & (a < n)
            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)
            inds = n * a[k].to(torch.int64) + b[k]
            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)

    def reset(self):
        if self.mat is not None:
            self.mat.zero_()

    def compute(self):
        h = self.mat.float()
        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)
        acc_global = torch.diag(h).sum() / h.sum()
        # 计算每个类别的准确率
        acc = torch.diag(h) / h.sum(1)
        # 计算每个类别预测与真实目标的iou
        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
        return acc_global, acc, iu

    def reduce_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        torch.distributed.all_reduce(self.mat)

    def __str__(self):
        acc_global, acc, iu = self.compute()
        return (
            'global correct: {:.1f}\n'
            'average row correct: {}\n'
            'IoU: {}\n'
            'mean IoU: {:.1f}').format(
                acc_global.item() * 100,
                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],
                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],
                iu.mean().item() * 100)


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {}'.format(header, total_time_str))


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    elif hasattr(args, "rank"):
        pass
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_segmentation/deeplab_v3/train_utils/train_and_eval.py
================================================
import torch
from torch import nn
import train_utils.distributed_utils as utils


def criterion(inputs, target):
    losses = {}
    for name, x in inputs.items():
        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充
        losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)

    if len(losses) == 1:
        return losses['out']

    return losses['out'] + 0.5 * losses['aux']


def evaluate(model, data_loader, device, num_classes):
    model.eval()
    confmat = utils.ConfusionMatrix(num_classes)
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, 100, header):
            image, target = image.to(device), target.to(device)
            output = model(image)
            output = output['out']

            confmat.update(target.flatten(), output.argmax(1).flatten())

        confmat.reduce_from_all_processes()

    return confmat


def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for image, target in metric_logger.log_every(data_loader, print_freq, header):
        image, target = image.to(device), target.to(device)
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            output = model(image)
            loss = criterion(output, target)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        lr_scheduler.step()

        lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(loss=loss.item(), lr=lr)

    return metric_logger.meters["loss"].global_avg, lr


def create_lr_scheduler(optimizer,
                        num_step: int,
                        epochs: int,
                        warmup=True,
                        warmup_epochs=1,
                        warmup_factor=1e-3):
    assert num_step > 0 and epochs > 0
    if warmup is False:
        warmup_epochs = 0

    def f(x):
        """
        根据step数返回一个学习率倍率因子，
        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法
        """
        if warmup is True and x <= (warmup_epochs * num_step):
            alpha = float(x) / (warmup_epochs * num_step)
            # warmup过程中lr倍率因子从warmup_factor -> 1
            return warmup_factor * (1 - alpha) + alpha
        else:
            # warmup后lr倍率因子从1 -> 0
            # 参考deeplab_v2: Learning rate policy
            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


================================================
FILE: pytorch_segmentation/deeplab_v3/transforms.py
================================================
import numpy as np
import random

import torch
from torchvision import transforms as T
from torchvision.transforms import functional as F


def pad_if_smaller(img, size, fill=0):
    # 如果图像最小边长小于给定size，则用数值fill进行padding
    min_size = min(img.size)
    if min_size < size:
        ow, oh = img.size
        padh = size - oh if oh < size else 0
        padw = size - ow if ow < size else 0
        img = F.pad(img, (0, 0, padw, padh), fill=fill)
    return img


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class RandomResize(object):
    def __init__(self, min_size, max_size=None):
        self.min_size = min_size
        if max_size is None:
            max_size = min_size
        self.max_size = max_size

    def __call__(self, image, target):
        size = random.randint(self.min_size, self.max_size)
        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小
        image = F.resize(image, size)
        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST
        # 如果是之前的版本需要使用PIL.Image.NEAREST
        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, flip_prob):
        self.flip_prob = flip_prob

    def __call__(self, image, target):
        if random.random() < self.flip_prob:
            image = F.hflip(image)
            target = F.hflip(target)
        return image, target


class RandomCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = pad_if_smaller(image, self.size)
        target = pad_if_smaller(target, self.size, fill=255)
        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
        image = F.crop(image, *crop_params)
        target = F.crop(target, *crop_params)
        return image, target


class CenterCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = F.center_crop(image, self.size)
        target = F.center_crop(target, self.size)
        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        target = torch.as_tensor(np.array(target), dtype=torch.int64)
        return image, target


class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target


================================================
FILE: pytorch_segmentation/deeplab_v3/validation.py
================================================
import os
import torch

from src import deeplabv3_resnet50
from train_utils import evaluate
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    assert os.path.exists(args.weights), f"weights {args.weights} not found."

    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=SegmentationPresetEval(520),
                                  txt_name="val.txt")

    num_workers = 8
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = deeplabv3_resnet50(aux=args.aux, num_classes=num_classes)
    model.load_state_dict(torch.load(args.weights, map_location=device)['model'])
    model.to(device)

    confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
    print(confmat)


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch deeplabv3 validation")

    parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
    parser.add_argument("--weights", default="./save_weights/model_29.pth")
    parser.add_argument("--num-classes", default=20, type=int)
    parser.add_argument("--aux", default=True, type=bool, help="auxilier loss")
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()
    main(args)


================================================
FILE: pytorch_segmentation/fcn/README.md
================================================
# FCN(Fully Convolutional Networks for Semantic Segmentation)

## 该项目主要是来自pytorch官方torchvision模块中的源码
* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10
* Ubuntu或Centos(Windows暂不支持多GPU训练)
* 最好使用GPU训练
* 详细环境配置见```requirements.txt```

## 文件结构：
```
  ├── src: 模型的backbone以及FCN的搭建
  ├── train_utils: 训练、验证以及多GPU训练相关模块
  ├── my_dataset.py: 自定义dataset用于读取VOC数据集
  ├── train.py: 以fcn_resnet50(这里使用了Dilated/Atrous Convolution)进行训练
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件
  └── pascal_voc_classes.json: pascal_voc标签文件
```

## 预训练权重下载地址：
* 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)
* fcn_resnet50: https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth
* fcn_resnet101: https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth
* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```fcn_resnet50_coco.pth```文件，
  不是```fcn_resnet50_coco-1167a1af.pth```
 
 
## 数据集，本例程使用的是PASCAL VOC2012数据集
* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)
* ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```

## 注意事项
* 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**
* 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可，其他代码尽量不要改动

## 如果对FCN原理不是很理解可参考我的bilibili
* https://www.bilibili.com/video/BV1J3411C7zd
* https://www.bilibili.com/video/BV1ev411u7TX

## 进一步了解该项目，以及对FCN代码的分析可参考我的bilibili
* https://www.bilibili.com/video/BV19q4y1971Q

## Pytorch官方实现的FCN网络框架图
![torch_fcn](torch_fcn.png)


================================================
FILE: pytorch_segmentation/fcn/get_palette.py
================================================
import json
import numpy as np
from PIL import Image

# 读取mask标签
target = Image.open("./2007_001288.png")
# 获取调色板
palette = target.getpalette()
palette = np.reshape(palette, (-1, 3)).tolist()
# 转换成字典子形式
pd = dict((i, color) for i, color in enumerate(palette))

json_str = json.dumps(pd)
with open("palette.json", "w") as f:
    f.write(json_str)

# target = np.array(target)
# print(target)


================================================
FILE: pytorch_segmentation/fcn/my_dataset.py
================================================
import os

import torch.utils.data as data
from PIL import Image


class VOCSegmentation(data.Dataset):
    def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
        super(VOCSegmentation, self).__init__()
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        assert os.path.exists(root), "path '{}' does not exist.".format(root)
        image_dir = os.path.join(root, 'JPEGImages')
        mask_dir = os.path.join(root, 'SegmentationClass')

        txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
        assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
        with open(os.path.join(txt_path), "r") as f:
            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
        assert (len(self.images) == len(self.masks))
        self.transforms = transforms

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is the image segmentation.
        """
        img = Image.open(self.images[index]).convert('RGB')
        target = Image.open(self.masks[index])

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.images)

    @staticmethod
    def collate_fn(batch):
        images, targets = list(zip(*batch))
        batched_imgs = cat_list(images, fill_value=0)
        batched_targets = cat_list(targets, fill_value=255)
        return batched_imgs, batched_targets


def cat_list(images, fill_value=0):
    # 计算该batch数据中，channel, h, w的最大值
    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
    batch_shape = (len(images),) + max_size
    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
    for img, pad_img in zip(images, batched_imgs):
        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
    return batched_imgs


# dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True))
# d1 = dataset[0]
# print(d1)


================================================
FILE: pytorch_segmentation/fcn/palette.json
================================================
{"0": [0, 0, 0], "1": [128, 0, 0], "2": [0, 128, 0], "3": [128, 128, 0], "4": [0, 0, 128], "5": [128, 0, 128], "6": [0, 128, 128], "7": [128, 128, 128], "8": [64, 0, 0], "9": [192, 0, 0], "10": [64, 128, 0], "11": [192, 128, 0], "12": [64, 0, 128], "13": [192, 0, 128], "14": [64, 128, 128], "15": [192, 128, 128], "16": [0, 64, 0], "17": [128, 64, 0], "18": [0, 192, 0], "19": [128, 192, 0], "20": [0, 64, 128], "21": [128, 64, 128], "22": [0, 192, 128], "23": [128, 192, 128], "24": [64, 64, 0], "25": [192, 64, 0], "26": [64, 192, 0], "27": [192, 192, 0], "28": [64, 64, 128], "29": [192, 64, 128], "30": [64, 192, 128], "31": [192, 192, 128], "32": [0, 0, 64], "33": [128, 0, 64], "34": [0, 128, 64], "35": [128, 128, 64], "36": [0, 0, 192], "37": [128, 0, 192], "38": [0, 128, 192], "39": [128, 128, 192], "40": [64, 0, 64], "41": [192, 0, 64], "42": [64, 128, 64], "43": [192, 128, 64], "44": [64, 0, 192], "45": [192, 0, 192], "46": [64, 128, 192], "47": [192, 128, 192], "48": [0, 64, 64], "49": [128, 64, 64], "50": [0, 192, 64], "51": [128, 192, 64], "52": [0, 64, 192], "53": [128, 64, 192], "54": [0, 192, 192], "55": [128, 192, 192], "56": [64, 64, 64], "57": [192, 64, 64], "58": [64, 192, 64], "59": [192, 192, 64], "60": [64, 64, 192], "61": [192, 64, 192], "62": [64, 192, 192], "63": [192, 192, 192], "64": [32, 0, 0], "65": [160, 0, 0], "66": [32, 128, 0], "67": [160, 128, 0], "68": [32, 0, 128], "69": [160, 0, 128], "70": [32, 128, 128], "71": [160, 128, 128], "72": [96, 0, 0], "73": [224, 0, 0], "74": [96, 128, 0], "75": [224, 128, 0], "76": [96, 0, 128], "77": [224, 0, 128], "78": [96, 128, 128], "79": [224, 128, 128], "80": [32, 64, 0], "81": [160, 64, 0], "82": [32, 192, 0], "83": [160, 192, 0], "84": [32, 64, 128], "85": [160, 64, 128], "86": [32, 192, 128], "87": [160, 192, 128], "88": [96, 64, 0], "89": [224, 64, 0], "90": [96, 192, 0], "91": [224, 192, 0], "92": [96, 64, 128], "93": [224, 64, 128], "94": [96, 192, 128], "95": [224, 192, 128], "96": [32, 0, 64], "97": [160, 0, 64], "98": [32, 128, 64], "99": [160, 128, 64], "100": [32, 0, 192], "101": [160, 0, 192], "102": [32, 128, 192], "103": [160, 128, 192], "104": [96, 0, 64], "105": [224, 0, 64], "106": [96, 128, 64], "107": [224, 128, 64], "108": [96, 0, 192], "109": [224, 0, 192], "110": [96, 128, 192], "111": [224, 128, 192], "112": [32, 64, 64], "113": [160, 64, 64], "114": [32, 192, 64], "115": [160, 192, 64], "116": [32, 64, 192], "117": [160, 64, 192], "118": [32, 192, 192], "119": [160, 192, 192], "120": [96, 64, 64], "121": [224, 64, 64], "122": [96, 192, 64], "123": [224, 192, 64], "124": [96, 64, 192], "125": [224, 64, 192], "126": [96, 192, 192], "127": [224, 192, 192], "128": [0, 32, 0], "129": [128, 32, 0], "130": [0, 160, 0], "131": [128, 160, 0], "132": [0, 32, 128], "133": [128, 32, 128], "134": [0, 160, 128], "135": [128, 160, 128], "136": [64, 32, 0], "137": [192, 32, 0], "138": [64, 160, 0], "139": [192, 160, 0], "140": [64, 32, 128], "141": [192, 32, 128], "142": [64, 160, 128], "143": [192, 160, 128], "144": [0, 96, 0], "145": [128, 96, 0], "146": [0, 224, 0], "147": [128, 224, 0], "148": [0, 96, 128], "149": [128, 96, 128], "150": [0, 224, 128], "151": [128, 224, 128], "152": [64, 96, 0], "153": [192, 96, 0], "154": [64, 224, 0], "155": [192, 224, 0], "156": [64, 96, 128], "157": [192, 96, 128], "158": [64, 224, 128], "159": [192, 224, 128], "160": [0, 32, 64], "161": [128, 32, 64], "162": [0, 160, 64], "163": [128, 160, 64], "164": [0, 32, 192], "165": [128, 32, 192], "166": [0, 160, 192], "167": [128, 160, 192], "168": [64, 32, 64], "169": [192, 32, 64], "170": [64, 160, 64], "171": [192, 160, 64], "172": [64, 32, 192], "173": [192, 32, 192], "174": [64, 160, 192], "175": [192, 160, 192], "176": [0, 96, 64], "177": [128, 96, 64], "178": [0, 224, 64], "179": [128, 224, 64], "180": [0, 96, 192], "181": [128, 96, 192], "182": [0, 224, 192], "183": [128, 224, 192], "184": [64, 96, 64], "185": [192, 96, 64], "186": [64, 224, 64], "187": [192, 224, 64], "188": [64, 96, 192], "189": [192, 96, 192], "190": [64, 224, 192], "191": [192, 224, 192], "192": [32, 32, 0], "193": [160, 32, 0], "194": [32, 160, 0], "195": [160, 160, 0], "196": [32, 32, 128], "197": [160, 32, 128], "198": [32, 160, 128], "199": [160, 160, 128], "200": [96, 32, 0], "201": [224, 32, 0], "202": [96, 160, 0], "203": [224, 160, 0], "204": [96, 32, 128], "205": [224, 32, 128], "206": [96, 160, 128], "207": [224, 160, 128], "208": [32, 96, 0], "209": [160, 96, 0], "210": [32, 224, 0], "211": [160, 224, 0], "212": [32, 96, 128], "213": [160, 96, 128], "214": [32, 224, 128], "215": [160, 224, 128], "216": [96, 96, 0], "217": [224, 96, 0], "218": [96, 224, 0], "219": [224, 224, 0], "220": [96, 96, 128], "221": [224, 96, 128], "222": [96, 224, 128], "223": [224, 224, 128], "224": [32, 32, 64], "225": [160, 32, 64], "226": [32, 160, 64], "227": [160, 160, 64], "228": [32, 32, 192], "229": [160, 32, 192], "230": [32, 160, 192], "231": [160, 160, 192], "232": [96, 32, 64], "233": [224, 32, 64], "234": [96, 160, 64], "235": [224, 160, 64], "236": [96, 32, 192], "237": [224, 32, 192], "238": [96, 160, 192], "239": [224, 160, 192], "240": [32, 96, 64], "241": [160, 96, 64], "242": [32, 224, 64], "243": [160, 224, 64], "244": [32, 96, 192], "245": [160, 96, 192], "246": [32, 224, 192], "247": [160, 224, 192], "248": [96, 96, 64], "249": [224, 96, 64], "250": [96, 224, 64], "251": [224, 224, 64], "252": [96, 96, 192], "253": [224, 96, 192], "254": [96, 224, 192], "255": [224, 224, 192]}

================================================
FILE: pytorch_segmentation/fcn/pascal_voc_classes.json
================================================
{
    "aeroplane": 1,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
}

================================================
FILE: pytorch_segmentation/fcn/predict.py
================================================
import os
import time
import json

import torch
from torchvision import transforms
import numpy as np
from PIL import Image

from src import fcn_resnet50


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    aux = False  # inference time not need aux_classifier
    classes = 20
    weights_path = "./save_weights/model_29.pth"
    img_path = "./test.jpg"
    palette_path = "./palette.json"
    assert os.path.exists(weights_path), f"weights {weights_path} not found."
    assert os.path.exists(img_path), f"image {img_path} not found."
    assert os.path.exists(palette_path), f"palette {palette_path} not found."
    with open(palette_path, "rb") as f:
        pallette_dict = json.load(f)
        pallette = []
        for v in pallette_dict.values():
            pallette += v

    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = fcn_resnet50(aux=aux, num_classes=classes+1)

    # delete weights about aux_classifier
    weights_dict = torch.load(weights_path, map_location='cpu')['model']
    for k in list(weights_dict.keys()):
        if "aux" in k:
            del weights_dict[k]

    # load weights
    model.load_state_dict(weights_dict)
    model.to(device)

    # load image
    original_img = Image.open(img_path)

    # from pil image to tensor and normalize
    data_transform = transforms.Compose([transforms.Resize(520),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                                              std=(0.229, 0.224, 0.225))])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init model
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        output = model(img.to(device))
        t_end = time_synchronized()
        print("inference time: {}".format(t_end - t_start))

        prediction = output['out'].argmax(1).squeeze(0)
        prediction = prediction.to("cpu").numpy().astype(np.uint8)
        mask = Image.fromarray(prediction)
        mask.putpalette(pallette)
        mask.save("test_result.png")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/fcn/requirements.txt
================================================
numpy==1.22.0
torch==1.13.1
torchvision==0.11.1
Pillow


================================================
FILE: pytorch_segmentation/fcn/results20210918-122740.txt
================================================
[epoch: 0] 
train_loss: 0.7720 
lr: 0.000100 
global correct: 93.4
average row correct: ['96.8', '90.7', '79.2', '82.7', '78.3', '59.4', '91.2', '82.9', '92.9', '57.8', '87.1', '66.5', '82.8', '78.7', '88.9', '95.1', '62.9', '86.6', '67.1', '89.4', '81.4']
IoU: ['93.2', '85.9', '39.2', '79.5', '68.9', '55.6', '88.1', '72.8', '81.7', '41.1', '74.9', '54.0', '72.9', '74.6', '77.7', '87.1', '54.4', '75.1', '50.7', '82.9', '72.6']
mean IoU: 70.6

[epoch: 1] 
train_loss: 0.6589 
lr: 0.000090 
global correct: 93.4
average row correct: ['96.5', '89.4', '74.6', '84.5', '82.9', '68.8', '93.8', '84.7', '93.5', '56.9', '87.5', '68.0', '81.8', '78.2', '90.7', '94.8', '65.6', '87.1', '70.9', '89.1', '85.1']
IoU: ['93.3', '85.5', '38.2', '79.9', '69.8', '62.7', '87.5', '75.7', '80.3', '40.7', '74.8', '54.6', '72.2', '74.1', '76.6', '87.6', '54.7', '72.9', '51.0', '82.6', '70.9']
mean IoU: 70.7

[epoch: 2] 
train_loss: 0.6238 
lr: 0.000080 
global correct: 93.5
average row correct: ['96.5', '93.2', '75.8', '85.3', '84.2', '70.6', '91.7', '85.7', '93.2', '58.8', '76.7', '68.3', '81.4', '83.2', '88.7', '95.1', '69.9', '88.6', '70.5', '91.8', '86.7']
IoU: ['93.5', '86.1', '39.0', '81.2', '69.9', '63.9', '87.7', '76.4', '80.2', '41.5', '71.8', '56.2', '71.3', '74.4', '78.0', '87.3', '57.3', '70.9', '50.3', '82.8', '71.9']
mean IoU: 71.0

[epoch: 3] 
train_loss: 0.5854 
lr: 0.000069 
global correct: 93.5
average row correct: ['96.7', '91.5', '77.3', '83.9', '80.8', '74.0', '92.6', '86.7', '94.3', '65.0', '68.7', '67.8', '76.9', '88.2', '85.5', '94.5', '71.8', '87.9', '66.7', '89.4', '86.3']
IoU: ['93.6', '87.0', '39.4', '80.6', '69.4', '66.5', '87.7', '76.9', '78.5', '41.5', '66.3', '55.8', '68.9', '70.8', '78.4', '88.2', '58.7', '71.0', '49.4', '83.3', '74.1']
mean IoU: 70.8

[epoch: 4] 
train_loss: 0.6140 
lr: 0.000059 
global correct: 93.6
average row correct: ['96.5', '92.4', '77.4', '85.1', '80.2', '80.6', '94.1', '87.0', '94.8', '62.8', '87.2', '70.0', '78.8', '77.5', '85.8', '94.7', '73.4', '83.9', '68.6', '88.0', '86.6']
IoU: ['93.7', '87.1', '39.4', '80.7', '70.1', '70.2', '87.0', '77.3', '78.9', '41.3', '72.3', '56.7', '69.9', '72.2', '77.9', '87.8', '57.8', '72.6', '50.5', '82.1', '74.3']
mean IoU: 71.4

[epoch: 5] 
train_loss: 0.5653 
lr: 0.000048 
global correct: 93.7
average row correct: ['96.6', '87.7', '76.9', '84.3', '79.3', '81.6', '92.6', '88.4', '94.0', '61.9', '76.7', '71.0', '81.5', '88.2', '87.3', '94.5', '73.5', '84.9', '69.1', '91.4', '86.6']
IoU: ['93.7', '85.6', '40.1', '80.7', '70.2', '70.5', '87.6', '77.4', '80.4', '42.0', '72.4', '57.0', '72.2', '73.4', '78.4', '88.1', '58.6', '74.3', '50.4', '82.7', '73.6']
mean IoU: 71.9

[epoch: 6] 
train_loss: 0.5500 
lr: 0.000037 
global correct: 93.1
average row correct: ['96.4', '91.6', '74.8', '78.4', '83.2', '81.9', '89.4', '88.7', '95.8', '59.4', '57.2', '70.0', '77.4', '75.2', '87.6', '95.2', '74.1', '82.4', '72.4', '91.9', '87.2']
IoU: ['93.6', '87.5', '39.5', '76.1', '68.5', '71.0', '86.1', '78.6', '74.6', '41.4', '54.6', '57.2', '61.0', '67.0', '78.2', '87.9', '58.1', '67.5', '50.4', '82.0', '74.2']
mean IoU: 69.3

[epoch: 7] 
train_loss: 0.5553 
lr: 0.000026 
global correct: 93.3
average row correct: ['96.7', '88.4', '72.3', '82.2', '80.7', '81.6', '82.5', '89.7', '93.4', '59.0', '69.5', '70.1', '78.8', '86.4', '87.3', '94.9', '70.8', '89.6', '72.2', '85.4', '86.3']
IoU: ['93.6', '85.5', '39.0', '79.2', '69.8', '70.8', '79.7', '76.8', '79.0', '41.8', '65.9', '57.1', '68.9', '71.0', '78.0', '87.9', '58.6', '66.9', '50.7', '78.2', '74.4']
mean IoU: 70.1

[epoch: 8] 
train_loss: 0.5601 
lr: 0.000014 
global correct: 93.4
average row correct: ['96.5', '91.0', '73.8', '81.4', '83.7', '83.4', '89.9', '88.8', '95.4', '61.3', '80.6', '70.0', '75.4', '84.3', '88.2', '94.9', '72.0', '83.7', '69.7', '83.3', '88.5']
IoU: ['93.6', '87.2', '40.1', '78.6', '69.8', '71.3', '84.5', '77.6', '76.3', '41.0', '72.4', '56.8', '66.9', '73.2', '77.6', '87.8', '59.2', '72.5', '50.2', '78.7', '69.9']
mean IoU: 70.7

[epoch: 9] 
train_loss: 0.5550 
lr: 0.000000 
global correct: 93.1
average row correct: ['96.7', '93.8', '72.7', '73.0', '82.1', '80.4', '95.6', '86.7', '95.6', '61.8', '63.6', '69.0', '73.2', '65.1', '87.9', '94.5', '73.7', '86.5', '69.0', '88.4', '87.9']
IoU: ['93.7', '87.2', '39.4', '71.7', '70.4', '70.9', '86.9', '78.5', '73.1', '41.8', '58.4', '56.3', '59.4', '61.9', '78.2', '88.4', '59.3', '63.6', '50.4', '82.6', '73.7']
mean IoU: 68.8


================================================
FILE: pytorch_segmentation/fcn/src/__init__.py
================================================
from .fcn_model import fcn_resnet50, fcn_resnet101


================================================
FILE: pytorch_segmentation/fcn/src/backbone.py
================================================
import torch
import torch.nn as nn


def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x):
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def forward(self, x):
        return self._forward_impl(x)


def _resnet(block, layers, **kwargs):
    model = ResNet(block, layers, **kwargs)
    return model


def resnet50(**kwargs):
    r"""ResNet-50 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    r"""ResNet-101 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)


================================================
FILE: pytorch_segmentation/fcn/src/fcn_model.py
================================================
from collections import OrderedDict

from typing import Dict

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from .backbone import resnet50, resnet101


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model

    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.

    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.

    Args:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    _version = 2
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")
        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}

        # 重新构建backbone，将没有使用到的模块全部删掉
        layers = OrderedDict()
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        out = OrderedDict()
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class FCN(nn.Module):
    """
    Implements a Fully-Convolutional Network for semantic segmentation.

    Args:
        backbone (nn.Module): the network used to compute the features for the model.
            The backbone should return an OrderedDict[Tensor], with the key being
            "out" for the last feature map used, and "aux" if an auxiliary classifier
            is used.
        classifier (nn.Module): module that takes the "out" element returned from
            the backbone and returns a dense prediction.
        aux_classifier (nn.Module, optional): auxiliary classifier used during training
    """
    __constants__ = ['aux_classifier']

    def __init__(self, backbone, classifier, aux_classifier=None):
        super(FCN, self).__init__()
        self.backbone = backbone
        self.classifier = classifier
        self.aux_classifier = aux_classifier

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        input_shape = x.shape[-2:]
        # contract: features is a dict of tensors
        features = self.backbone(x)

        result = OrderedDict()
        x = features["out"]
        x = self.classifier(x)
        # 原论文中虽然使用的是ConvTranspose2d，但权重是冻结的，所以就是一个bilinear插值
        x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
        result["out"] = x

        if self.aux_classifier is not None:
            x = features["aux"]
            x = self.aux_classifier(x)
            # 原论文中虽然使用的是ConvTranspose2d，但权重是冻结的，所以就是一个bilinear插值
            x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
            result["aux"] = x

        return result


class FCNHead(nn.Sequential):
    def __init__(self, in_channels, channels):
        inter_channels = in_channels // 4
        layers = [
            nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(inter_channels),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Conv2d(inter_channels, channels, 1)
        ]

        super(FCNHead, self).__init__(*layers)


def fcn_resnet50(aux, num_classes=21, pretrain_backbone=False):
    # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth'
    # 'fcn_resnet50_coco': 'https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth'
    backbone = resnet50(replace_stride_with_dilation=[False, True, True])

    if pretrain_backbone:
        # 载入resnet50 backbone预训练权重
        backbone.load_state_dict(torch.load("resnet50.pth", map_location='cpu'))

    out_inplanes = 2048
    aux_inplanes = 1024

    return_layers = {'layer4': 'out'}
    if aux:
        return_layers['layer3'] = 'aux'
    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

    aux_classifier = None
    # why using aux: https://github.com/pytorch/vision/issues/4292
    if aux:
        aux_classifier = FCNHead(aux_inplanes, num_classes)

    classifier = FCNHead(out_inplanes, num_classes)

    model = FCN(backbone, classifier, aux_classifier)

    return model


def fcn_resnet101(aux, num_classes=21, pretrain_backbone=False):
    # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth'
    # 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth'
    backbone = resnet101(replace_stride_with_dilation=[False, True, True])

    if pretrain_backbone:
        # 载入resnet101 backbone预训练权重
        backbone.load_state_dict(torch.load("resnet101.pth", map_location='cpu'))

    out_inplanes = 2048
    aux_inplanes = 1024

    return_layers = {'layer4': 'out'}
    if aux:
        return_layers['layer3'] = 'aux'
    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

    aux_classifier = None
    # why using aux: https://github.com/pytorch/vision/issues/4292
    if aux:
        aux_classifier = FCNHead(aux_inplanes, num_classes)

    classifier = FCNHead(out_inplanes, num_classes)

    model = FCN(backbone, classifier, aux_classifier)

    return model


================================================
FILE: pytorch_segmentation/fcn/train.py
================================================
import os
import time
import datetime

import torch

from src import fcn_resnet50
from train_utils import train_one_epoch, evaluate, create_lr_scheduler
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(2.0 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train):
    base_size = 520
    crop_size = 480

    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)


def create_model(aux, num_classes, pretrain=True):
    model = fcn_resnet50(aux=aux, num_classes=num_classes)

    if pretrain:
        weights_dict = torch.load("./fcn_resnet50_coco.pth", map_location='cpu')

        if num_classes != 21:
            # 官方提供的预训练权重是21类(包括背景)
            # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错
            for k in list(weights_dict.keys()):
                if "classifier.4" in k:
                    del weights_dict[k]

        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
        if len(missing_keys) != 0 or len(unexpected_keys) != 0:
            print("missing_keys: ", missing_keys)
            print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # 用来保存训练以及验证过程中信息
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt
    train_dataset = VOCSegmentation(args.data_path,
                                    year="2012",
                                    transforms=get_transform(train=True),
                                    txt_name="train.txt")

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=get_transform(train=False),
                                  txt_name="val.txt")

    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True,
                                               pin_memory=True,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(aux=args.aux, num_classes=num_classes)
    model.to(device)

    params_to_optimize = [
        {"params": [p for p in model.backbone.parameters() if p.requires_grad]},
        {"params": [p for p in model.classifier.parameters() if p.requires_grad]}
    ]

    if args.aux:
        params = [p for p in model.aux_classifier.parameters() if p.requires_grad]
        params_to_optimize.append({"params": params, "lr": args.lr * 10})

    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay
    )

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        # write into txt
        with open(results_file, "a") as f:
            # 记录每个epoch对应的train_loss、lr以及验证集各指标
            train_info = f"[epoch: {epoch}]\n" \
                         f"train_loss: {mean_loss:.4f}\n" \
                         f"lr: {lr:.6f}\n"
            f.write(train_info + val_info + "\n\n")

        save_file = {"model": model.state_dict(),
                     "optimizer": optimizer.state_dict(),
                     "lr_scheduler": lr_scheduler.state_dict(),
                     "epoch": epoch,
                     "args": args}
        if args.amp:
            save_file["scaler"] = scaler.state_dict()
        torch.save(save_file, "save_weights/model_{}.pth".format(epoch))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("training time {}".format(total_time_str))


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch fcn training")

    parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
    parser.add_argument("--num-classes", default=20, type=int)
    parser.add_argument("--aux", default=True, type=bool, help="auxilier loss")
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument("-b", "--batch-size", default=4, type=int)
    parser.add_argument("--epochs", default=30, type=int, metavar="N",
                        help="number of total epochs to train")

    parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()

    if not os.path.exists("./save_weights"):
        os.mkdir("./save_weights")

    main(args)


================================================
FILE: pytorch_segmentation/fcn/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch

from src import fcn_resnet50
from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(2.0 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train):
    base_size = 520
    crop_size = 480

    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)


def create_model(aux, num_classes):
    model = fcn_resnet50(aux=aux, num_classes=num_classes)
    weights_dict = torch.load("./fcn_resnet50_coco.pth", map_location='cpu')

    if num_classes != 21:
        # 官方提供的预训练权重是21类(包括背景)
        # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错
        for k in list(weights_dict.keys()):
            if "classifier.4" in k:
                del weights_dict[k]

    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt
    train_dataset = VOCSegmentation(args.data_path,
                                    year="2012",
                                    transforms=get_transform(train=True),
                                    txt_name="train.txt")
    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=get_transform(train=False),
                                  txt_name="val.txt")

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=train_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn, drop_last=True)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + 20 classes
    model = create_model(aux=args.aux, num_classes=num_classes)
    model.to(device)

    if args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params_to_optimize = [
        {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]},
        {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]},
    ]
    if args.aux:
        params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad]
        params_to_optimize.append({"params": params, "lr": args.lr * 10})
    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 记录每个epoch对应的train_loss、lr以及验证集各指标
                train_info = f"[epoch: {epoch}]\n" \
                             f"train_loss: {mean_loss:.4f}\n" \
                             f"lr: {lr:.6f}\n"
                f.write(train_info + val_info + "\n\n")

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_file = {'model': model_without_ddp.state_dict(),
                         'optimizer': optimizer.state_dict(),
                         'lr_scheduler': lr_scheduler.state_dict(),
                         'args': args,
                         'epoch': epoch}
            if args.amp:
                save_file["scaler"] = scaler.state_dict()
            save_on_master(save_file,
                           os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data/', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    parser.add_argument("--aux", default=True, type=bool, help="auxilier loss")
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=20, type=int, metavar='N',
                        help='number of total epochs to run')
    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢
    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 训练学习率，这里默认设置成0.0001，如果效果不好可以尝试加大学习率
    parser.add_argument('--lr', default=0.0001, type=float,
                        help='initial learning rate')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 分布式进程数
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_segmentation/fcn/train_utils/__init__.py
================================================
from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
from .distributed_utils import init_distributed_mode, save_on_master, mkdir


================================================
FILE: pytorch_segmentation/fcn/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import time
import torch
import torch.distributed as dist

import errno
import os


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


class ConfusionMatrix(object):
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.mat = None

    def update(self, a, b):
        n = self.num_classes
        if self.mat is None:
            # 创建混淆矩阵
            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
        with torch.no_grad():
            # 寻找GT中为目标的像素索引
            k = (a >= 0) & (a < n)
            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)
            inds = n * a[k].to(torch.int64) + b[k]
            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)

    def reset(self):
        if self.mat is not None:
            self.mat.zero_()

    def compute(self):
        h = self.mat.float()
        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)
        acc_global = torch.diag(h).sum() / h.sum()
        # 计算每个类别的准确率
        acc = torch.diag(h) / h.sum(1)
        # 计算每个类别预测与真实目标的iou
        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
        return acc_global, acc, iu

    def reduce_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        torch.distributed.all_reduce(self.mat)

    def __str__(self):
        acc_global, acc, iu = self.compute()
        return (
            'global correct: {:.1f}\n'
            'average row correct: {}\n'
            'IoU: {}\n'
            'mean IoU: {:.1f}').format(
                acc_global.item() * 100,
                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],
                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],
                iu.mean().item() * 100)


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {}'.format(header, total_time_str))


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    elif hasattr(args, "rank"):
        pass
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_segmentation/fcn/train_utils/train_and_eval.py
================================================
import torch
from torch import nn
import train_utils.distributed_utils as utils


def criterion(inputs, target):
    losses = {}
    for name, x in inputs.items():
        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充
        losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)

    if len(losses) == 1:
        return losses['out']

    return losses['out'] + 0.5 * losses['aux']


def evaluate(model, data_loader, device, num_classes):
    model.eval()
    confmat = utils.ConfusionMatrix(num_classes)
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, 100, header):
            image, target = image.to(device), target.to(device)
            output = model(image)
            output = output['out']

            confmat.update(target.flatten(), output.argmax(1).flatten())

        confmat.reduce_from_all_processes()

    return confmat


def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for image, target in metric_logger.log_every(data_loader, print_freq, header):
        image, target = image.to(device), target.to(device)
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            output = model(image)
            loss = criterion(output, target)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        lr_scheduler.step()

        lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(loss=loss.item(), lr=lr)

    return metric_logger.meters["loss"].global_avg, lr


def create_lr_scheduler(optimizer,
                        num_step: int,
                        epochs: int,
                        warmup=True,
                        warmup_epochs=1,
                        warmup_factor=1e-3):
    assert num_step > 0 and epochs > 0
    if warmup is False:
        warmup_epochs = 0

    def f(x):
        """
        根据step数返回一个学习率倍率因子，
        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法
        """
        if warmup is True and x <= (warmup_epochs * num_step):
            alpha = float(x) / (warmup_epochs * num_step)
            # warmup过程中lr倍率因子从warmup_factor -> 1
            return warmup_factor * (1 - alpha) + alpha
        else:
            # warmup后lr倍率因子从1 -> 0
            # 参考deeplab_v2: Learning rate policy
            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


================================================
FILE: pytorch_segmentation/fcn/transforms.py
================================================
import numpy as np
import random

import torch
from torchvision import transforms as T
from torchvision.transforms import functional as F


def pad_if_smaller(img, size, fill=0):
    # 如果图像最小边长小于给定size，则用数值fill进行padding
    min_size = min(img.size)
    if min_size < size:
        ow, oh = img.size
        padh = size - oh if oh < size else 0
        padw = size - ow if ow < size else 0
        img = F.pad(img, (0, 0, padw, padh), fill=fill)
    return img


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class RandomResize(object):
    def __init__(self, min_size, max_size=None):
        self.min_size = min_size
        if max_size is None:
            max_size = min_size
        self.max_size = max_size

    def __call__(self, image, target):
        size = random.randint(self.min_size, self.max_size)
        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小
        image = F.resize(image, size)
        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST
        # 如果是之前的版本需要使用PIL.Image.NEAREST
        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, flip_prob):
        self.flip_prob = flip_prob

    def __call__(self, image, target):
        if random.random() < self.flip_prob:
            image = F.hflip(image)
            target = F.hflip(target)
        return image, target


class RandomCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = pad_if_smaller(image, self.size)
        target = pad_if_smaller(target, self.size, fill=255)
        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
        image = F.crop(image, *crop_params)
        target = F.crop(target, *crop_params)
        return image, target


class CenterCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = F.center_crop(image, self.size)
        target = F.center_crop(target, self.size)
        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        target = torch.as_tensor(np.array(target), dtype=torch.int64)
        return image, target


class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target


================================================
FILE: pytorch_segmentation/fcn/validation.py
================================================
import os
import torch

from src import fcn_resnet50
from train_utils import evaluate
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    assert os.path.exists(args.weights), f"weights {args.weights} not found."

    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=SegmentationPresetEval(520),
                                  txt_name="val.txt")

    num_workers = 8
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = fcn_resnet50(aux=args.aux, num_classes=num_classes)
    model.load_state_dict(torch.load(args.weights, map_location=device)['model'])
    model.to(device)

    confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
    print(confmat)


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch fcn training")

    parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
    parser.add_argument("--weights", default="./save_weights/model_29.pth")
    parser.add_argument("--num-classes", default=20, type=int)
    parser.add_argument("--aux", default=True, type=bool, help="auxilier loss")
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()

    if not os.path.exists("./save_weights"):
        os.mkdir("./save_weights")

    main(args)


================================================
FILE: pytorch_segmentation/lraspp/README.md
================================================
# LRASPP(Searching for MobileNetV3)

## 该项目主要是来自pytorch官方torchvision模块中的源码
* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10
* Ubuntu或Centos(Windows暂不支持多GPU训练)
* 最好使用GPU训练
* 详细环境配置见```requirements.txt```

## 文件结构：
```
  ├── src: 模型的backbone以及LRASPP的搭建
  ├── train_utils: 训练、验证以及多GPU训练相关模块
  ├── my_dataset.py: 自定义dataset用于读取VOC数据集
  ├── train.py: 单GPU训练脚本
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件
  └── pascal_voc_classes.json: pascal_voc标签文件
```

## 预训练权重下载地址：
* 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)
* lraspp_mobilenet_v3_large: https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth
* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```lraspp_mobilenet_v3_large.pth```文件，
  不是```lraspp_mobilenet_v3_large-d234d4ea.pth```
 
 
## 数据集，本例程使用的是PASCAL VOC2012数据集
* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033

## 训练方法
* 确保提前准备好数据集
* 确保提前下载好对应预训练模型权重
* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)
* ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```

## 注意事项
* 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**
* 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--data-path'和'--weights'即可，其他代码尽量不要改动

## 如果对LRASPP原理不是很理解可参考我的bilibili
LR-ASPP网络讲解: [https://www.bilibili.com/video/BV1LS4y1M76E](https://www.bilibili.com/video/BV1LS4y1M76E)

## 进一步了解该项目，以及对LRASPP代码的分析可参考我的bilibili
LR-ASPP源码解析(Pytorch版): [https://www.bilibili.com/video/bv13D4y1F7ML](https://www.bilibili.com/video/bv13D4y1F7ML)

## Pytorch官方实现的LRASPP网络框架图
![lraspp](lraspp.png)


================================================
FILE: pytorch_segmentation/lraspp/get_palette.py
================================================
import json
import numpy as np
from PIL import Image

# 读取mask标签
target = Image.open("./2007_001288.png")
# 获取调色板
palette = target.getpalette()
palette = np.reshape(palette, (-1, 3)).tolist()
# 转换成字典子形式
pd = dict((i, color) for i, color in enumerate(palette))

json_str = json.dumps(pd)
with open("palette.json", "w") as f:
    f.write(json_str)

# target = np.array(target)
# print(target)


================================================
FILE: pytorch_segmentation/lraspp/my_dataset.py
================================================
import os

import torch.utils.data as data
from PIL import Image


class VOCSegmentation(data.Dataset):
    def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"):
        super(VOCSegmentation, self).__init__()
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        assert os.path.exists(root), "path '{}' does not exist.".format(root)
        image_dir = os.path.join(root, 'JPEGImages')
        mask_dir = os.path.join(root, 'SegmentationClass')

        txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name)
        assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path)
        with open(os.path.join(txt_path), "r") as f:
            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
        assert (len(self.images) == len(self.masks))
        self.transforms = transforms

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is the image segmentation.
        """
        img = Image.open(self.images[index]).convert('RGB')
        target = Image.open(self.masks[index])

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.images)

    @staticmethod
    def collate_fn(batch):
        images, targets = list(zip(*batch))
        batched_imgs = cat_list(images, fill_value=0)
        batched_targets = cat_list(targets, fill_value=255)
        return batched_imgs, batched_targets


def cat_list(images, fill_value=0):
    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
    batch_shape = (len(images),) + max_size
    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
    for img, pad_img in zip(images, batched_imgs):
        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
    return batched_imgs


# dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True))
# d1 = dataset[0]
# print(d1)


================================================
FILE: pytorch_segmentation/lraspp/palette.json
================================================
{"0": [0, 0, 0], "1": [128, 0, 0], "2": [0, 128, 0], "3": [128, 128, 0], "4": [0, 0, 128], "5": [128, 0, 128], "6": [0, 128, 128], "7": [128, 128, 128], "8": [64, 0, 0], "9": [192, 0, 0], "10": [64, 128, 0], "11": [192, 128, 0], "12": [64, 0, 128], "13": [192, 0, 128], "14": [64, 128, 128], "15": [192, 128, 128], "16": [0, 64, 0], "17": [128, 64, 0], "18": [0, 192, 0], "19": [128, 192, 0], "20": [0, 64, 128], "21": [128, 64, 128], "22": [0, 192, 128], "23": [128, 192, 128], "24": [64, 64, 0], "25": [192, 64, 0], "26": [64, 192, 0], "27": [192, 192, 0], "28": [64, 64, 128], "29": [192, 64, 128], "30": [64, 192, 128], "31": [192, 192, 128], "32": [0, 0, 64], "33": [128, 0, 64], "34": [0, 128, 64], "35": [128, 128, 64], "36": [0, 0, 192], "37": [128, 0, 192], "38": [0, 128, 192], "39": [128, 128, 192], "40": [64, 0, 64], "41": [192, 0, 64], "42": [64, 128, 64], "43": [192, 128, 64], "44": [64, 0, 192], "45": [192, 0, 192], "46": [64, 128, 192], "47": [192, 128, 192], "48": [0, 64, 64], "49": [128, 64, 64], "50": [0, 192, 64], "51": [128, 192, 64], "52": [0, 64, 192], "53": [128, 64, 192], "54": [0, 192, 192], "55": [128, 192, 192], "56": [64, 64, 64], "57": [192, 64, 64], "58": [64, 192, 64], "59": [192, 192, 64], "60": [64, 64, 192], "61": [192, 64, 192], "62": [64, 192, 192], "63": [192, 192, 192], "64": [32, 0, 0], "65": [160, 0, 0], "66": [32, 128, 0], "67": [160, 128, 0], "68": [32, 0, 128], "69": [160, 0, 128], "70": [32, 128, 128], "71": [160, 128, 128], "72": [96, 0, 0], "73": [224, 0, 0], "74": [96, 128, 0], "75": [224, 128, 0], "76": [96, 0, 128], "77": [224, 0, 128], "78": [96, 128, 128], "79": [224, 128, 128], "80": [32, 64, 0], "81": [160, 64, 0], "82": [32, 192, 0], "83": [160, 192, 0], "84": [32, 64, 128], "85": [160, 64, 128], "86": [32, 192, 128], "87": [160, 192, 128], "88": [96, 64, 0], "89": [224, 64, 0], "90": [96, 192, 0], "91": [224, 192, 0], "92": [96, 64, 128], "93": [224, 64, 128], "94": [96, 192, 128], "95": [224, 192, 128], "96": [32, 0, 64], "97": [160, 0, 64], "98": [32, 128, 64], "99": [160, 128, 64], "100": [32, 0, 192], "101": [160, 0, 192], "102": [32, 128, 192], "103": [160, 128, 192], "104": [96, 0, 64], "105": [224, 0, 64], "106": [96, 128, 64], "107": [224, 128, 64], "108": [96, 0, 192], "109": [224, 0, 192], "110": [96, 128, 192], "111": [224, 128, 192], "112": [32, 64, 64], "113": [160, 64, 64], "114": [32, 192, 64], "115": [160, 192, 64], "116": [32, 64, 192], "117": [160, 64, 192], "118": [32, 192, 192], "119": [160, 192, 192], "120": [96, 64, 64], "121": [224, 64, 64], "122": [96, 192, 64], "123": [224, 192, 64], "124": [96, 64, 192], "125": [224, 64, 192], "126": [96, 192, 192], "127": [224, 192, 192], "128": [0, 32, 0], "129": [128, 32, 0], "130": [0, 160, 0], "131": [128, 160, 0], "132": [0, 32, 128], "133": [128, 32, 128], "134": [0, 160, 128], "135": [128, 160, 128], "136": [64, 32, 0], "137": [192, 32, 0], "138": [64, 160, 0], "139": [192, 160, 0], "140": [64, 32, 128], "141": [192, 32, 128], "142": [64, 160, 128], "143": [192, 160, 128], "144": [0, 96, 0], "145": [128, 96, 0], "146": [0, 224, 0], "147": [128, 224, 0], "148": [0, 96, 128], "149": [128, 96, 128], "150": [0, 224, 128], "151": [128, 224, 128], "152": [64, 96, 0], "153": [192, 96, 0], "154": [64, 224, 0], "155": [192, 224, 0], "156": [64, 96, 128], "157": [192, 96, 128], "158": [64, 224, 128], "159": [192, 224, 128], "160": [0, 32, 64], "161": [128, 32, 64], "162": [0, 160, 64], "163": [128, 160, 64], "164": [0, 32, 192], "165": [128, 32, 192], "166": [0, 160, 192], "167": [128, 160, 192], "168": [64, 32, 64], "169": [192, 32, 64], "170": [64, 160, 64], "171": [192, 160, 64], "172": [64, 32, 192], "173": [192, 32, 192], "174": [64, 160, 192], "175": [192, 160, 192], "176": [0, 96, 64], "177": [128, 96, 64], "178": [0, 224, 64], "179": [128, 224, 64], "180": [0, 96, 192], "181": [128, 96, 192], "182": [0, 224, 192], "183": [128, 224, 192], "184": [64, 96, 64], "185": [192, 96, 64], "186": [64, 224, 64], "187": [192, 224, 64], "188": [64, 96, 192], "189": [192, 96, 192], "190": [64, 224, 192], "191": [192, 224, 192], "192": [32, 32, 0], "193": [160, 32, 0], "194": [32, 160, 0], "195": [160, 160, 0], "196": [32, 32, 128], "197": [160, 32, 128], "198": [32, 160, 128], "199": [160, 160, 128], "200": [96, 32, 0], "201": [224, 32, 0], "202": [96, 160, 0], "203": [224, 160, 0], "204": [96, 32, 128], "205": [224, 32, 128], "206": [96, 160, 128], "207": [224, 160, 128], "208": [32, 96, 0], "209": [160, 96, 0], "210": [32, 224, 0], "211": [160, 224, 0], "212": [32, 96, 128], "213": [160, 96, 128], "214": [32, 224, 128], "215": [160, 224, 128], "216": [96, 96, 0], "217": [224, 96, 0], "218": [96, 224, 0], "219": [224, 224, 0], "220": [96, 96, 128], "221": [224, 96, 128], "222": [96, 224, 128], "223": [224, 224, 128], "224": [32, 32, 64], "225": [160, 32, 64], "226": [32, 160, 64], "227": [160, 160, 64], "228": [32, 32, 192], "229": [160, 32, 192], "230": [32, 160, 192], "231": [160, 160, 192], "232": [96, 32, 64], "233": [224, 32, 64], "234": [96, 160, 64], "235": [224, 160, 64], "236": [96, 32, 192], "237": [224, 32, 192], "238": [96, 160, 192], "239": [224, 160, 192], "240": [32, 96, 64], "241": [160, 96, 64], "242": [32, 224, 64], "243": [160, 224, 64], "244": [32, 96, 192], "245": [160, 96, 192], "246": [32, 224, 192], "247": [160, 224, 192], "248": [96, 96, 64], "249": [224, 96, 64], "250": [96, 224, 64], "251": [224, 224, 64], "252": [96, 96, 192], "253": [224, 96, 192], "254": [96, 224, 192], "255": [224, 224, 192]}

================================================
FILE: pytorch_segmentation/lraspp/pascal_voc_classes.json
================================================
{
    "aeroplane": 1,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
}

================================================
FILE: pytorch_segmentation/lraspp/predict.py
================================================
import os
import time
import json

import torch
from torchvision import transforms
import numpy as np
from PIL import Image

from src import lraspp_mobilenetv3_large


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    classes = 20
    weights_path = "./save_weights/model_29.pth"
    img_path = "./test.jpg"
    palette_path = "./palette.json"
    assert os.path.exists(weights_path), f"weights {weights_path} not found."
    assert os.path.exists(img_path), f"image {img_path} not found."
    assert os.path.exists(palette_path), f"palette {palette_path} not found."
    with open(palette_path, "rb") as f:
        pallette_dict = json.load(f)
        pallette = []
        for v in pallette_dict.values():
            pallette += v

    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = lraspp_mobilenetv3_large(num_classes=classes+1)

    # load weights
    weights_dict = torch.load(weights_path, map_location='cpu')['model']
    model.load_state_dict(weights_dict)
    model.to(device)

    # load image
    original_img = Image.open(img_path)

    # from pil image to tensor and normalize
    data_transform = transforms.Compose([transforms.Resize(520),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                                              std=(0.229, 0.224, 0.225))])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init model
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        output = model(img.to(device))
        t_end = time_synchronized()
        print("inference time: {}".format(t_end - t_start))

        prediction = output['out'].argmax(1).squeeze(0)
        prediction = prediction.to("cpu").numpy().astype(np.uint8)
        mask = Image.fromarray(prediction)
        mask.putpalette(pallette)
        mask.save("test_result.png")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/lraspp/requirements.txt
================================================
numpy==1.22.0
torch==1.10.0
torchvision==0.11.1
Pillow


================================================
FILE: pytorch_segmentation/lraspp/results20211028-105233.txt
================================================
[epoch: 0]
train_loss: 0.5343
lr: 0.000100
global correct: 93.1
average row correct: ['96.8', '90.0', '73.9', '87.9', '79.4', '66.2', '92.1', '79.5', '90.9', '45.0', '88.9', '54.7', '85.8', '89.8', '87.5', '91.2', '66.8', '85.0', '68.4', '87.6', '71.6']
IoU: ['92.4', '85.7', '34.8', '84.3', '66.4', '59.9', '89.2', '71.2', '86.0', '34.6', '82.3', '46.1', '78.6', '82.1', '79.8', '82.5', '54.8', '79.4', '50.2', '83.8', '65.5']
mean IoU: 70.9

[epoch: 1]
train_loss: 0.4683
lr: 0.000077
global correct: 93.2
average row correct: ['96.2', '92.6', '75.2', '92.3', '82.6', '70.9', '93.5', '83.9', '93.5', '47.9', '91.0', '61.9', '87.0', '90.5', '89.8', '90.0', '68.1', '86.4', '70.4', '90.4', '75.5']
IoU: ['92.5', '86.1', '34.9', '85.1', '65.3', '63.0', '90.0', '73.1', '86.0', '34.8', '83.2', '50.0', '77.6', '81.2', '79.8', '82.3', '54.3', '78.4', '49.8', '85.5', '67.3']
mean IoU: 71.4

[epoch: 2]
train_loss: 0.4053
lr: 0.000054
global correct: 93.1
average row correct: ['95.9', '93.1', '75.9', '92.6', '83.8', '75.3', '94.4', '85.6', '93.7', '50.2', '91.2', '62.1', '87.1', '90.8', '90.3', '89.8', '71.2', '86.8', '71.8', '91.1', '77.5']
IoU: ['92.5', '86.0', '35.1', '84.7', '65.2', '65.6', '90.4', '73.3', '85.9', '34.8', '83.0', '50.0', '77.7', '81.7', '79.2', '82.3', '53.9', '78.5', '49.9', '85.6', '67.2']
mean IoU: 71.6

[epoch: 3]
train_loss: 0.4358
lr: 0.000029
global correct: 93.1
average row correct: ['95.8', '93.4', '76.0', '92.3', '83.2', '78.1', '94.0', '86.3', '93.0', '50.9', '91.1', '62.9', '88.0', '90.9', '90.4', '89.6', '71.6', '87.0', '72.4', '92.4', '78.5']
IoU: ['92.5', '86.0', '35.3', '85.1', '66.1', '66.9', '89.8', '73.3', '85.9', '34.8', '83.0', '50.4', '78.0', '81.5', '79.0', '82.1', '54.1', '78.6', '50.0', '85.6', '67.1']
mean IoU: 71.7

[epoch: 4]
train_loss: 0.3886
lr: 0.000000
global correct: 93.1
average row correct: ['95.6', '93.8', '76.0', '92.8', '83.6', '77.9', '94.2', '86.1', '93.5', '50.9', '92.0', '63.8', '88.8', '91.4', '90.6', '89.4', '73.2', '87.4', '73.0', '92.4', '78.9']
IoU: ['92.5', '86.0', '35.3', '84.4', '66.2', '66.5', '89.9', '73.2', '85.9', '34.6', '83.2', '50.8', '78.0', '81.4', '78.6', '82.0', '53.6', '78.4', '50.1', '85.7', '66.6']
mean IoU: 71.6


================================================
FILE: pytorch_segmentation/lraspp/src/__init__.py
================================================
from .lraspp_model import lraspp_mobilenetv3_large


================================================
FILE: pytorch_segmentation/lraspp/src/lraspp_model.py
================================================
from collections import OrderedDict

from typing import Dict

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from .mobilenet_backbone import mobilenet_v3_large


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model

    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.

    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.

    Args:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    _version = 2
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")
        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}

        # 重新构建backbone，将没有使用到的模块全部删掉
        layers = OrderedDict()
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        out = OrderedDict()
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class LRASPP(nn.Module):
    """
    Implements a Lite R-ASPP Network for semantic segmentation from
    `"Searching for MobileNetV3"
    <https://arxiv.org/abs/1905.02244>`_.

    Args:
        backbone (nn.Module): the network used to compute the features for the model.
            The backbone should return an OrderedDict[Tensor], with the key being
            "high" for the high level feature map and "low" for the low level feature map.
        low_channels (int): the number of channels of the low level features.
        high_channels (int): the number of channels of the high level features.
        num_classes (int): number of output classes of the model (including the background).
        inter_channels (int, optional): the number of channels for intermediate computations.
    """
    __constants__ = ['aux_classifier']

    def __init__(self,
                 backbone: nn.Module,
                 low_channels: int,
                 high_channels: int,
                 num_classes: int,
                 inter_channels: int = 128) -> None:
        super(LRASPP, self).__init__()
        self.backbone = backbone
        self.classifier = LRASPPHead(low_channels, high_channels, num_classes, inter_channels)

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        input_shape = x.shape[-2:]
        features = self.backbone(x)
        out = self.classifier(features)
        out = F.interpolate(out, size=input_shape, mode="bilinear", align_corners=False)

        result = OrderedDict()
        result["out"] = out

        return result


class LRASPPHead(nn.Module):
    def __init__(self,
                 low_channels: int,
                 high_channels: int,
                 num_classes: int,
                 inter_channels: int) -> None:
        super(LRASPPHead, self).__init__()
        self.cbr = nn.Sequential(
            nn.Conv2d(high_channels, inter_channels, 1, bias=False),
            nn.BatchNorm2d(inter_channels),
            nn.ReLU(inplace=True)
        )
        self.scale = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(high_channels, inter_channels, 1, bias=False),
            nn.Sigmoid()
        )
        self.low_classifier = nn.Conv2d(low_channels, num_classes, 1)
        self.high_classifier = nn.Conv2d(inter_channels, num_classes, 1)

    def forward(self, inputs: Dict[str, Tensor]) -> Tensor:
        low = inputs["low"]
        high = inputs["high"]

        x = self.cbr(high)
        s = self.scale(high)
        x = x * s
        x = F.interpolate(x, size=low.shape[-2:], mode="bilinear", align_corners=False)

        return self.low_classifier(low) + self.high_classifier(x)


def lraspp_mobilenetv3_large(num_classes=21, pretrain_backbone=False):
    # 'mobilenetv3_large_imagenet': 'https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth'
    # 'lraspp_mobilenet_v3_large_coco': 'https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth'
    backbone = mobilenet_v3_large(dilated=True)

    if pretrain_backbone:
        # 载入mobilenetv3 large backbone预训练权重
        backbone.load_state_dict(torch.load("mobilenet_v3_large.pth", map_location='cpu'))

    backbone = backbone.features

    # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
    # The first and last blocks are always included because they are the C0 (conv1) and Cn.
    stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "is_strided", False)] + [len(backbone) - 1]
    low_pos = stage_indices[-4]  # use C2 here which has output_stride = 8
    high_pos = stage_indices[-1]  # use C5 which has output_stride = 16
    low_channels = backbone[low_pos].out_channels
    high_channels = backbone[high_pos].out_channels

    return_layers = {str(low_pos): "low", str(high_pos): "high"}
    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

    model = LRASPP(backbone, low_channels, high_channels, num_classes)
    return model


================================================
FILE: pytorch_segmentation/lraspp/src/mobilenet_backbone.py
================================================
from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None,
                 dilation: int = 1):
        padding = (kernel_size - 1) // 2 * dilation
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         dilation=dilation,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer(inplace=True))
        self.out_channels = out_planes


class SqueezeExcitation(nn.Module):
    def __init__(self, input_c: int, squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)
        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)
        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = F.relu(scale, inplace=True)
        scale = self.fc2(scale)
        scale = F.hardsigmoid(scale, inplace=True)
        return scale * x


class InvertedResidualConfig:
    def __init__(self,
                 input_c: int,
                 kernel: int,
                 expanded_c: int,
                 out_c: int,
                 use_se: bool,
                 activation: str,
                 stride: int,
                 dilation: int,
                 width_multi: float):
        self.input_c = self.adjust_channels(input_c, width_multi)
        self.kernel = kernel
        self.expanded_c = self.adjust_channels(expanded_c, width_multi)
        self.out_c = self.adjust_channels(out_c, width_multi)
        self.use_se = use_se
        self.use_hs = activation == "HS"  # whether using h-swish activation
        self.stride = stride
        self.dilation = dilation

    @staticmethod
    def adjust_channels(channels: int, width_multi: float):
        return _make_divisible(channels * width_multi, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        layers: List[nn.Module] = []
        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU

        # expand
        if cnf.expanded_c != cnf.input_c:
            layers.append(ConvBNActivation(cnf.input_c,
                                           cnf.expanded_c,
                                           kernel_size=1,
                                           norm_layer=norm_layer,
                                           activation_layer=activation_layer))

        # depthwise
        stride = 1 if cnf.dilation > 1 else cnf.stride
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.expanded_c,
                                       kernel_size=cnf.kernel,
                                       stride=stride,
                                       dilation=cnf.dilation,
                                       groups=cnf.expanded_c,
                                       norm_layer=norm_layer,
                                       activation_layer=activation_layer))

        if cnf.use_se:
            layers.append(SqueezeExcitation(cnf.expanded_c))

        # project
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.out_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Identity))

        self.block = nn.Sequential(*layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        if self.use_res_connect:
            result += x

        return result


class MobileNetV3(nn.Module):
    def __init__(self,
                 inverted_residual_setting: List[InvertedResidualConfig],
                 last_channel: int,
                 num_classes: int = 1000,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super(MobileNetV3, self).__init__()

        if not inverted_residual_setting:
            raise ValueError("The inverted_residual_setting should not be empty.")
        elif not (isinstance(inverted_residual_setting, List) and
                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
            raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers: List[nn.Module] = []

        # building first layer
        firstconv_output_c = inverted_residual_setting[0].input_c
        layers.append(ConvBNActivation(3,
                                       firstconv_output_c,
                                       kernel_size=3,
                                       stride=2,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.append(block(cnf, norm_layer))

        # building last several layers
        lastconv_input_c = inverted_residual_setting[-1].out_c
        lastconv_output_c = 6 * lastconv_input_c
        layers.append(ConvBNActivation(lastconv_input_c,
                                       lastconv_output_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),
                                        nn.Hardswish(inplace=True),
                                        nn.Dropout(p=0.2, inplace=True),
                                        nn.Linear(last_channel, num_classes))

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def mobilenet_v3_large(num_classes: int = 1000,
                       reduced_tail: bool = False,
                       dilated: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
        dilated: whether using dilated conv
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1
    dilation = 2 if dilated else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation
        bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
        bneck_conf(16, 3, 64, 24, False, "RE", 2, 1),  # C1
        bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
        bneck_conf(24, 5, 72, 40, True, "RE", 2, 1),  # C2
        bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
        bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
        bneck_conf(40, 3, 240, 80, False, "HS", 2, 1),  # C3
        bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
        bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
        bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
        bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation),  # C4
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
    ]
    last_channel = adjust_channels(1280 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


def mobilenet_v3_small(num_classes: int = 1000,
                       reduced_tail: bool = False,
                       dilated: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>.

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
        dilated: whether using dilated conv
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1
    dilation = 2 if dilated else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation
        bneck_conf(16, 3, 16, 16, True, "RE", 2, 1),  # C1
        bneck_conf(16, 3, 72, 24, False, "RE", 2, 1),  # C2
        bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
        bneck_conf(24, 5, 96, 40, True, "HS", 2, 1),  # C3
        bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
        bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
        bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
        bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
        bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation),  # C4
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation)
    ]
    last_channel = adjust_channels(1024 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


================================================
FILE: pytorch_segmentation/lraspp/train.py
================================================
import os
import time
import datetime

import torch

from src import lraspp_mobilenetv3_large
from train_utils import train_one_epoch, evaluate, create_lr_scheduler
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(2.0 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train):
    base_size = 520
    crop_size = 480

    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)


def create_model(num_classes, pretrain=True):
    model = lraspp_mobilenetv3_large(num_classes=num_classes)

    if pretrain:
        weights_dict = torch.load("./lraspp_mobilenet_v3_large.pth", map_location='cpu')

        if num_classes != 21:
            # 官方提供的预训练权重是21类(包括背景)
            # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错
            for k in list(weights_dict.keys()):
                if "low_classifier" in k or "high_classifier" in k:
                    del weights_dict[k]

        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
        if len(missing_keys) != 0 or len(unexpected_keys) != 0:
            print("missing_keys: ", missing_keys)
            print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # 用来保存训练以及验证过程中信息
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt
    train_dataset = VOCSegmentation(args.data_path,
                                    year="2012",
                                    transforms=get_transform(train=True),
                                    txt_name="train.txt")

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=get_transform(train=False),
                                  txt_name="val.txt")

    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True,
                                               pin_memory=True,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=num_classes)
    model.to(device)

    params_to_optimize = [
        {"params": [p for p in model.backbone.parameters() if p.requires_grad]},
        {"params": [p for p in model.classifier.parameters() if p.requires_grad]}
    ]

    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay
    )

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        # write into txt
        with open(results_file, "a") as f:
            # 记录每个epoch对应的train_loss、lr以及验证集各指标
            train_info = f"[epoch: {epoch}]\n" \
                         f"train_loss: {mean_loss:.4f}\n" \
                         f"lr: {lr:.6f}\n"
            f.write(train_info + val_info + "\n\n")

        save_file = {"model": model.state_dict(),
                     "optimizer": optimizer.state_dict(),
                     "lr_scheduler": lr_scheduler.state_dict(),
                     "epoch": epoch,
                     "args": args}
        if args.amp:
            save_file["scaler"] = scaler.state_dict()
        torch.save(save_file, "save_weights/model_{}.pth".format(epoch))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("training time {}".format(total_time_str))


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch lraspp training")

    parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
    parser.add_argument("--num-classes", default=20, type=int)
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument("-b", "--batch-size", default=4, type=int)
    parser.add_argument("--epochs", default=30, type=int, metavar="N",
                        help="number of total epochs to train")

    parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()

    if not os.path.exists("./save_weights"):
        os.mkdir("./save_weights")

    main(args)


================================================
FILE: pytorch_segmentation/lraspp/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch

from src import lraspp_mobilenetv3_large
from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(2.0 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train):
    base_size = 520
    crop_size = 480

    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)


def create_model(num_classes):
    model = lraspp_mobilenetv3_large(num_classes=num_classes)
    weights_dict = torch.load("./deeplabv3_resnet50_coco.pth", map_location='cpu')

    if num_classes != 21:
        # 官方提供的预训练权重是21类(包括背景)
        # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错
        for k in list(weights_dict.keys()):
            if "low_classifier" in k or "high_classifier" in k:
                del weights_dict[k]

    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    VOC_root = args.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # load train data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt
    train_dataset = VOCSegmentation(args.data_path,
                                    year="2012",
                                    transforms=get_transform(train=True),
                                    txt_name="train.txt")
    # load validation data set
    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=get_transform(train=False),
                                  txt_name="val.txt")

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=train_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn, drop_last=True)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + 20 classes
    model = create_model(num_classes=num_classes)
    model.to(device)

    if args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params_to_optimize = [
        {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]},
        {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]},
    ]

    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 记录每个epoch对应的train_loss、lr以及验证集各指标
                train_info = f"[epoch: {epoch}]\n" \
                             f"train_loss: {mean_loss:.4f}\n" \
                             f"lr: {lr:.6f}\n"
                f.write(train_info + val_info + "\n\n")

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_file = {'model': model_without_ddp.state_dict(),
                         'optimizer': optimizer.state_dict(),
                         'lr_scheduler': lr_scheduler.state_dict(),
                         'args': args,
                         'epoch': epoch}
            if args.amp:
                save_file["scaler"] = scaler.state_dict()
            save_on_master(save_file,
                           os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='/data/', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=20, type=int, metavar='N',
                        help='number of total epochs to run')
    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢
    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 训练学习率，这里默认设置成0.0001，如果效果不好可以尝试加大学习率
    parser.add_argument('--lr', default=0.0001, type=float,
                        help='initial learning rate')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 分布式进程数
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_segmentation/lraspp/train_utils/__init__.py
================================================
from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
from .distributed_utils import init_distributed_mode, save_on_master, mkdir


================================================
FILE: pytorch_segmentation/lraspp/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import time
import torch
import torch.distributed as dist

import errno
import os


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


class ConfusionMatrix(object):
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.mat = None

    def update(self, a, b):
        n = self.num_classes
        if self.mat is None:
            # 创建混淆矩阵
            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
        with torch.no_grad():
            # 寻找GT中为目标的像素索引
            k = (a >= 0) & (a < n)
            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)
            inds = n * a[k].to(torch.int64) + b[k]
            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)

    def reset(self):
        if self.mat is not None:
            self.mat.zero_()

    def compute(self):
        h = self.mat.float()
        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)
        acc_global = torch.diag(h).sum() / h.sum()
        # 计算每个类别的准确率
        acc = torch.diag(h) / h.sum(1)
        # 计算每个类别预测与真实目标的iou
        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
        return acc_global, acc, iu

    def reduce_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        torch.distributed.all_reduce(self.mat)

    def __str__(self):
        acc_global, acc, iu = self.compute()
        return (
            'global correct: {:.1f}\n'
            'average row correct: {}\n'
            'IoU: {}\n'
            'mean IoU: {:.1f}').format(
                acc_global.item() * 100,
                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],
                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],
                iu.mean().item() * 100)


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {}'.format(header, total_time_str))


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    elif hasattr(args, "rank"):
        pass
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_segmentation/lraspp/train_utils/train_and_eval.py
================================================
import torch
from torch import nn
import train_utils.distributed_utils as utils


def criterion(inputs, target):
    losses = {}
    for name, x in inputs.items():
        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充
        losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)

    if len(losses) == 1:
        return losses['out']

    return losses['out'] + 0.5 * losses['aux']


def evaluate(model, data_loader, device, num_classes):
    model.eval()
    confmat = utils.ConfusionMatrix(num_classes)
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, 100, header):
            image, target = image.to(device), target.to(device)
            output = model(image)
            output = output['out']

            confmat.update(target.flatten(), output.argmax(1).flatten())

        confmat.reduce_from_all_processes()

    return confmat


def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for image, target in metric_logger.log_every(data_loader, print_freq, header):
        image, target = image.to(device), target.to(device)
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            output = model(image)
            loss = criterion(output, target)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        lr_scheduler.step()

        lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(loss=loss.item(), lr=lr)

    return metric_logger.meters["loss"].global_avg, lr


def create_lr_scheduler(optimizer,
                        num_step: int,
                        epochs: int,
                        warmup=True,
                        warmup_epochs=1,
                        warmup_factor=1e-3):
    assert num_step > 0 and epochs > 0
    if warmup is False:
        warmup_epochs = 0

    def f(x):
        """
        根据step数返回一个学习率倍率因子，
        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法
        """
        if warmup is True and x <= (warmup_epochs * num_step):
            alpha = float(x) / (warmup_epochs * num_step)
            # warmup过程中lr倍率因子从warmup_factor -> 1
            return warmup_factor * (1 - alpha) + alpha
        else:
            # warmup后lr倍率因子从1 -> 0
            # 参考deeplab_v2: Learning rate policy
            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


================================================
FILE: pytorch_segmentation/lraspp/transforms.py
================================================
import numpy as np
import random

import torch
from torchvision import transforms as T
from torchvision.transforms import functional as F


def pad_if_smaller(img, size, fill=0):
    # 如果图像最小边长小于给定size，则用数值fill进行padding
    min_size = min(img.size)
    if min_size < size:
        ow, oh = img.size
        padh = size - oh if oh < size else 0
        padw = size - ow if ow < size else 0
        img = F.pad(img, (0, 0, padw, padh), fill=fill)
    return img


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class RandomResize(object):
    def __init__(self, min_size, max_size=None):
        self.min_size = min_size
        if max_size is None:
            max_size = min_size
        self.max_size = max_size

    def __call__(self, image, target):
        size = random.randint(self.min_size, self.max_size)
        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小
        image = F.resize(image, size)
        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST
        # 如果是之前的版本需要使用PIL.Image.NEAREST
        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, flip_prob):
        self.flip_prob = flip_prob

    def __call__(self, image, target):
        if random.random() < self.flip_prob:
            image = F.hflip(image)
            target = F.hflip(target)
        return image, target


class RandomCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = pad_if_smaller(image, self.size)
        target = pad_if_smaller(target, self.size, fill=255)
        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
        image = F.crop(image, *crop_params)
        target = F.crop(target, *crop_params)
        return image, target


class CenterCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = F.center_crop(image, self.size)
        target = F.center_crop(target, self.size)
        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        target = torch.as_tensor(np.array(target), dtype=torch.int64)
        return image, target


class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target


================================================
FILE: pytorch_segmentation/lraspp/validation.py
================================================
import os
import torch

from src import lraspp_mobilenetv3_large
from train_utils import evaluate
from my_dataset import VOCSegmentation
import transforms as T


class SegmentationPresetEval:
    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.RandomResize(base_size, base_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    assert os.path.exists(args.weights), f"weights {args.weights} not found."

    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt
    val_dataset = VOCSegmentation(args.data_path,
                                  year="2012",
                                  transforms=SegmentationPresetEval(520),
                                  txt_name="val.txt")

    num_workers = 8
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = lraspp_mobilenetv3_large(num_classes=num_classes)
    model.load_state_dict(torch.load(args.weights, map_location=device)['model'])
    model.to(device)

    confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)
    print(confmat)


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch lraspp validation")

    parser.add_argument("--data-path", default="/data/", help="VOCdevkit root")
    parser.add_argument("--weights", default="./save_weights/model_29.pth")
    parser.add_argument("--num-classes", default=20, type=int)
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()
    main(args)


================================================
FILE: pytorch_segmentation/u2net/README.md
================================================
# U2-Net(Going Deeper with Nested U-Structure for Salient Object Detection)

## 该项目主要是来自官方的源码
- https://github.com/xuebinqin/U-2-Net
- 注意，该项目是针对显著性目标检测领域（Salient Object Detection / SOD）

## 环境配置：
- Python3.6/3.7/3.8
- Pytorch1.10
- Ubuntu或Centos(Windows暂不支持多GPU训练)
- 建议使用GPU训练
- 详细环境配置见`requirements.txt`


## 文件结构
```
├── src: 搭建网络相关代码
├── train_utils: 训练以及验证相关代码
├── my_dataset.py: 自定义数据集读取相关代码
├── predict.py: 简易的预测代码
├── train.py: 单GPU或CPU训练代码
├── train_multi_GPU.py: 多GPU并行训练代码
├── validation.py: 单独验证模型相关代码
├── transforms.py: 数据预处理相关代码
└── requirements.txt: 项目依赖
```

## DUTS数据集准备
- DUTS数据集官方下载地址：[http://saliencydetection.net/duts/](http://saliencydetection.net/duts/)
- 如果下载不了，可以通过我提供的百度云下载，链接: https://pan.baidu.com/s/1nBI6GTN0ZilqH4Tvu18dow  密码: r7k6
- 其中DUTS-TR为训练集，DUTS-TE是测试（验证）集，数据集解压后目录结构如下：
```
├── DUTS-TR
│      ├── DUTS-TR-Image: 该文件夹存放所有训练集的图片
│      └── DUTS-TR-Mask: 该文件夹存放对应训练图片的GT标签（Mask蒙板形式）
│
└── DUTS-TE
       ├── DUTS-TE-Image: 该文件夹存放所有测试（验证）集的图片
       └── DUTS-TE-Mask: 该文件夹存放对应测试（验证）图片的GT标签（Mask蒙板形式）
```
- 注意训练或者验证过程中，将`--data-path`指向`DUTS-TR`所在根目录

## 官方权重
从官方转换得到的权重：
- `u2net_full.pth`下载链接: https://pan.baidu.com/s/1ojJZS8v3F_eFKkF3DEdEXA  密码: fh1v
- `u2net_lite.pth`下载链接: https://pan.baidu.com/s/1TIWoiuEz9qRvTX9quDqQHg  密码: 5stj

`u2net_full`在DUTS-TE上的验证结果(使用`validation.py`进行验证)：
```
MAE: 0.044
maxF1: 0.868
```
**注：**
- 这里的maxF1和原论文中的结果有些差异，经过对比发现差异主要来自post_norm，原仓库中会对预测结果进行post_norm，但在本仓库中将post_norm给移除了。
如果加上post_norm这里的maxF1为`0.872`，如果需要做该后处理可自行添加，post_norm流程如下，其中output为验证时网络预测的输出：
```python
ma = torch.max(output)
mi = torch.min(output)
output = (output - mi) / (ma - mi)
```
- 如果要载入官方提供的权重，需要将`src/model.py`中`ConvBNReLU`类里卷积的bias设置成True，因为官方代码里没有进行设置（Conv2d的bias默认为True）。
因为卷积后跟了BN，所以bias是起不到作用的，所以在本仓库中默认将bias设置为False。

## 训练记录(`u2net_full`)
训练指令：
```
torchrun --nproc_per_node=4 train_multi_GPU.py --lr 0.004 --amp
```
训练最终在DUTS-TE上的验证结果：
```
MAE: 0.047
maxF1: 0.859
```
训练过程详情可见results.txt文件，训练权重下载链接: https://pan.baidu.com/s/1df2jMkrjbgEv-r1NMaZCZg  密码: n4l6

## 训练方法
* 确保提前准备好数据集
* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`

## 如果对U2Net网络不了解的可参考我的bilibili
- [https://www.bilibili.com/video/BV1yB4y1z7m](https://www.bilibili.com/video/BV1yB4y1z7m)

## 进一步了解该项目，以及对U2Net代码的分析可参考我的bilibili
- [https://www.bilibili.com/video/BV1Kt4y137iS](https://www.bilibili.com/video/BV1Kt4y137iS)

## U2NET网络结构
![u2net](./u2net.png)

================================================
FILE: pytorch_segmentation/u2net/convert_weight.py
================================================
import re
import torch
from src import u2net_full, u2net_lite

layers = {"encode": [7, 6, 5, 4, 4, 4],
          "decode": [4, 4, 5, 6, 7]}


def convert_conv_bn(new_weight, prefix, ks, v):
    if "conv" in ks[0]:
        if "weight" == ks[1]:
            new_weight[prefix + ".conv.weight"] = v
        elif "bias" == ks[1]:
            new_weight[prefix + ".conv.bias"] = v
        else:
            print(f"unrecognized weight {prefix + ks[1]}")
        return

    if "bn" in ks[0]:
        if "running_mean" == ks[1]:
            new_weight[prefix + ".bn.running_mean"] = v
        elif "running_var" == ks[1]:
            new_weight[prefix + ".bn.running_var"] = v
        elif "weight" == ks[1]:
            new_weight[prefix + ".bn.weight"] = v
        elif "bias" == ks[1]:
            new_weight[prefix + ".bn.bias"] = v
        elif "num_batches_tracked" == ks[1]:
            return
        else:
            print(f"unrecognized weight {prefix + ks[1]}")
        return


def convert(old_weight: dict):
    new_weight = {}
    for k, v in old_weight.items():
        ks = k.split(".")
        if ("stage" in ks[0]) and ("d" not in ks[0]):
            # encode stage
            num = int(re.findall(r'\d', ks[0])[0]) - 1
            prefix = f"encode_modules.{num}"
            if "rebnconvin" == ks[1]:
                # ConvBNReLU module
                prefix += ".conv_in"
                convert_conv_bn(new_weight, prefix, ks[2:], v)
            elif ("rebnconv" in ks[1]) and ("d" not in ks[1]):
                num_ = int(re.findall(r'\d', ks[1])[0]) - 1
                prefix += f".encode_modules.{num_}"
                convert_conv_bn(new_weight, prefix, ks[2:], v)
            elif ("rebnconv" in ks[1]) and ("d" in ks[1]):
                num_ = layers["encode"][num] - int(re.findall(r'\d', ks[1])[0]) - 1
                prefix += f".decode_modules.{num_}"
                convert_conv_bn(new_weight, prefix, ks[2:], v)
            else:
                print(f"unrecognized key: {k}")

        elif ("stage" in ks[0]) and ("d" in ks[0]):
            # decode stage
            num = 5 - int(re.findall(r'\d', ks[0])[0])
            prefix = f"decode_modules.{num}"
            if "rebnconvin" == ks[1]:
                # ConvBNReLU module
                prefix += ".conv_in"
                convert_conv_bn(new_weight, prefix, ks[2:], v)
            elif ("rebnconv" in ks[1]) and ("d" not in ks[1]):
                num_ = int(re.findall(r'\d', ks[1])[0]) - 1
                prefix += f".encode_modules.{num_}"
                convert_conv_bn(new_weight, prefix, ks[2:], v)
            elif ("rebnconv" in ks[1]) and ("d" in ks[1]):
                num_ = layers["decode"][num] - int(re.findall(r'\d', ks[1])[0]) - 1
                prefix += f".decode_modules.{num_}"
                convert_conv_bn(new_weight, prefix, ks[2:], v)
            else:
                print(f"unrecognized key: {k}")
        elif "side" in ks[0]:
            # side
            num = 6 - int(re.findall(r'\d', ks[0])[0])
            prefix = f"side_modules.{num}"
            if "weight" == ks[1]:
                new_weight[prefix + ".weight"] = v
            elif "bias" == ks[1]:
                new_weight[prefix + ".bias"] = v
            else:
                print(f"unrecognized weight {prefix + ks[1]}")
        elif "outconv" in ks[0]:
            prefix = f"out_conv"
            if "weight" == ks[1]:
                new_weight[prefix + ".weight"] = v
            elif "bias" == ks[1]:
                new_weight[prefix + ".bias"] = v
            else:
                print(f"unrecognized weight {prefix + ks[1]}")
        else:
            print(f"unrecognized key: {k}")

    return new_weight


def main_1():
    from u2net import U2NET, U2NETP

    old_m = U2NET()
    old_m.load_state_dict(torch.load("u2net.pth", map_location='cpu'))
    new_m = u2net_full()

    # old_m = U2NETP()
    # old_m.load_state_dict(torch.load("u2netp.pth", map_location='cpu'))
    # new_m = u2net_lite()

    old_w = old_m.state_dict()

    w = convert(old_w)
    new_m.load_state_dict(w, strict=True)

    torch.random.manual_seed(0)
    x = torch.randn(1, 3, 288, 288)
    old_m.eval()
    new_m.eval()
    with torch.no_grad():
        out1 = old_m(x)[0]
        out2 = new_m(x)
        assert torch.equal(out1, out2)
        torch.save(new_m.state_dict(), "u2net_full.pth")


def main():
    old_w = torch.load("u2net.pth", map_location='cpu')
    new_m = u2net_full()

    # old_w = torch.load("u2netp.pth", map_location='cpu')
    # new_m = u2net_lite()

    w = convert(old_w)
    new_m.load_state_dict(w, strict=True)
    torch.save(new_m.state_dict(), "u2net_full.pth")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/u2net/my_dataset.py
================================================
import os

import cv2
import torch.utils.data as data


class DUTSDataset(data.Dataset):
    def __init__(self, root: str, train: bool = True, transforms=None):
        assert os.path.exists(root), f"path '{root}' does not exist."
        if train:
            self.image_root = os.path.join(root, "DUTS-TR", "DUTS-TR-Image")
            self.mask_root = os.path.join(root, "DUTS-TR", "DUTS-TR-Mask")
        else:
            self.image_root = os.path.join(root, "DUTS-TE", "DUTS-TE-Image")
            self.mask_root = os.path.join(root, "DUTS-TE", "DUTS-TE-Mask")
        assert os.path.exists(self.image_root), f"path '{self.image_root}' does not exist."
        assert os.path.exists(self.mask_root), f"path '{self.mask_root}' does not exist."

        image_names = [p for p in os.listdir(self.image_root) if p.endswith(".jpg")]
        mask_names = [p for p in os.listdir(self.mask_root) if p.endswith(".png")]
        assert len(image_names) > 0, f"not find any images in {self.image_root}."

        # check images and mask
        re_mask_names = []
        for p in image_names:
            mask_name = p.replace(".jpg", ".png")
            assert mask_name in mask_names, f"{p} has no corresponding mask."
            re_mask_names.append(mask_name)
        mask_names = re_mask_names

        self.images_path = [os.path.join(self.image_root, n) for n in image_names]
        self.masks_path = [os.path.join(self.mask_root, n) for n in mask_names]

        self.transforms = transforms

    def __getitem__(self, idx):
        image_path = self.images_path[idx]
        mask_path = self.masks_path[idx]
        image = cv2.imread(image_path, flags=cv2.IMREAD_COLOR)
        assert image is not None, f"failed to read image: {image_path}"
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR -> RGB
        h, w, _ = image.shape

        target = cv2.imread(mask_path, flags=cv2.IMREAD_GRAYSCALE)
        assert target is not None, f"failed to read mask: {mask_path}"

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self):
        return len(self.images_path)

    @staticmethod
    def collate_fn(batch):
        images, targets = list(zip(*batch))
        batched_imgs = cat_list(images, fill_value=0)
        batched_targets = cat_list(targets, fill_value=0)

        return batched_imgs, batched_targets


def cat_list(images, fill_value=0):
    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
    batch_shape = (len(images),) + max_size
    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
    for img, pad_img in zip(images, batched_imgs):
        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
    return batched_imgs


if __name__ == '__main__':
    train_dataset = DUTSDataset("./", train=True)
    print(len(train_dataset))

    val_dataset = DUTSDataset("./", train=False)
    print(len(val_dataset))

    i, t = train_dataset[0]


================================================
FILE: pytorch_segmentation/u2net/predict.py
================================================
import os
import time

import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
from torchvision.transforms import transforms

from src import u2net_full


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    weights_path = "./u2net_full.pth"
    img_path = "./test.png"
    threshold = 0.5

    assert os.path.exists(img_path), f"image file {img_path} dose not exists."

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(320),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std=(0.229, 0.224, 0.225))
    ])

    origin_img = cv2.cvtColor(cv2.imread(img_path, flags=cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)

    h, w = origin_img.shape[:2]
    img = data_transform(origin_img)
    img = torch.unsqueeze(img, 0).to(device)  # [C, H, W] -> [1, C, H, W]

    model = u2net_full()
    weights = torch.load(weights_path, map_location='cpu')
    if "model" in weights:
        model.load_state_dict(weights["model"])
    else:
        model.load_state_dict(weights)
    model.to(device)
    model.eval()

    with torch.no_grad():
        # init model
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        pred = model(img)
        t_end = time_synchronized()
        print("inference time: {}".format(t_end - t_start))
        pred = torch.squeeze(pred).to("cpu").numpy()  # [1, 1, H, W] -> [H, W]

        pred = cv2.resize(pred, dsize=(w, h), interpolation=cv2.INTER_LINEAR)
        pred_mask = np.where(pred > threshold, 1, 0)
        origin_img = np.array(origin_img, dtype=np.uint8)
        seg_img = origin_img * pred_mask[..., None]
        plt.imshow(seg_img)
        plt.show()
        cv2.imwrite("pred_result.png", cv2.cvtColor(seg_img.astype(np.uint8), cv2.COLOR_RGB2BGR))


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/u2net/requirements.txt
================================================
numpy==1.22.0
torch==1.13.1
torchvision==0.11.1
opencv_python==4.5.4.60


================================================
FILE: pytorch_segmentation/u2net/results20220723-123632.txt
================================================
[epoch: 0] train_loss: 2.7385 lr: 0.002002 MAE: 0.465 maxF1: 0.464 
[epoch: 10] train_loss: 1.0385 lr: 0.003994 MAE: 0.124 maxF1: 0.719 
[epoch: 20] train_loss: 0.7629 lr: 0.003972 MAE: 0.077 maxF1: 0.787 
[epoch: 30] train_loss: 0.6758 lr: 0.003936 MAE: 0.083 maxF1: 0.791 
[epoch: 40] train_loss: 0.4905 lr: 0.003884 MAE: 0.073 maxF1: 0.805 
[epoch: 50] train_loss: 0.4337 lr: 0.003818 MAE: 0.063 maxF1: 0.821 
[epoch: 60] train_loss: 0.4157 lr: 0.003738 MAE: 0.067 maxF1: 0.818 
[epoch: 70] train_loss: 0.3424 lr: 0.003644 MAE: 0.058 maxF1: 0.840 
[epoch: 80] train_loss: 0.2909 lr: 0.003538 MAE: 0.057 maxF1: 0.842 
[epoch: 90] train_loss: 0.3220 lr: 0.003420 MAE: 0.064 maxF1: 0.837 
[epoch: 100] train_loss: 0.2653 lr: 0.003292 MAE: 0.055 maxF1: 0.847 
[epoch: 110] train_loss: 0.2627 lr: 0.003153 MAE: 0.055 maxF1: 0.846 
[epoch: 120] train_loss: 0.3230 lr: 0.003005 MAE: 0.058 maxF1: 0.837 
[epoch: 130] train_loss: 0.2177 lr: 0.002850 MAE: 0.053 maxF1: 0.852 
[epoch: 140] train_loss: 0.2807 lr: 0.002688 MAE: 0.061 maxF1: 0.824 
[epoch: 150] train_loss: 0.2091 lr: 0.002520 MAE: 0.057 maxF1: 0.846 
[epoch: 160] train_loss: 0.1971 lr: 0.002349 MAE: 0.049 maxF1: 0.857 
[epoch: 170] train_loss: 0.2157 lr: 0.002175 MAE: 0.050 maxF1: 0.851 
[epoch: 180] train_loss: 0.1881 lr: 0.002000 MAE: 0.048 maxF1: 0.857 
[epoch: 190] train_loss: 0.1855 lr: 0.001825 MAE: 0.047 maxF1: 0.860 
[epoch: 200] train_loss: 0.1817 lr: 0.001651 MAE: 0.047 maxF1: 0.863 
[epoch: 210] train_loss: 0.1740 lr: 0.001480 MAE: 0.048 maxF1: 0.858 
[epoch: 220] train_loss: 0.1707 lr: 0.001312 MAE: 0.048 maxF1: 0.860 
[epoch: 230] train_loss: 0.1653 lr: 0.001150 MAE: 0.048 maxF1: 0.859 
[epoch: 240] train_loss: 0.1652 lr: 0.000995 MAE: 0.046 maxF1: 0.860 
[epoch: 250] train_loss: 0.1631 lr: 0.000847 MAE: 0.048 maxF1: 0.857 
[epoch: 260] train_loss: 0.1584 lr: 0.000708 MAE: 0.047 maxF1: 0.862 
[epoch: 270] train_loss: 0.1590 lr: 0.000580 MAE: 0.047 maxF1: 0.860 
[epoch: 280] train_loss: 0.1521 lr: 0.000462 MAE: 0.047 maxF1: 0.861 
[epoch: 290] train_loss: 0.1535 lr: 0.000356 MAE: 0.047 maxF1: 0.861 
[epoch: 300] train_loss: 0.1520 lr: 0.000262 MAE: 0.047 maxF1: 0.860 
[epoch: 310] train_loss: 0.1488 lr: 0.000182 MAE: 0.047 maxF1: 0.860 
[epoch: 320] train_loss: 0.1493 lr: 0.000116 MAE: 0.047 maxF1: 0.859 
[epoch: 330] train_loss: 0.1470 lr: 0.000064 MAE: 0.047 maxF1: 0.860 
[epoch: 340] train_loss: 0.1493 lr: 0.000028 MAE: 0.047 maxF1: 0.859 
[epoch: 350] train_loss: 0.1482 lr: 0.000006 MAE: 0.047 maxF1: 0.858 
[epoch: 359] train_loss: 0.1518 lr: 0.000000 MAE: 0.047 maxF1: 0.859 


================================================
FILE: pytorch_segmentation/u2net/src/__init__.py
================================================
from .model import u2net_full, u2net_lite


================================================
FILE: pytorch_segmentation/u2net/src/model.py
================================================
from typing import Union, List
import torch
import torch.nn as nn
import torch.nn.functional as F


class ConvBNReLU(nn.Module):
    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1):
        super().__init__()

        padding = kernel_size // 2 if dilation == 1 else dilation
        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=padding, dilation=dilation, bias=False)
        self.bn = nn.BatchNorm2d(out_ch)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.relu(self.bn(self.conv(x)))


class DownConvBNReLU(ConvBNReLU):
    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1, flag: bool = True):
        super().__init__(in_ch, out_ch, kernel_size, dilation)
        self.down_flag = flag

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self.down_flag:
            x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)

        return self.relu(self.bn(self.conv(x)))


class UpConvBNReLU(ConvBNReLU):
    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1, flag: bool = True):
        super().__init__(in_ch, out_ch, kernel_size, dilation)
        self.up_flag = flag

    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
        if self.up_flag:
            x1 = F.interpolate(x1, size=x2.shape[2:], mode='bilinear', align_corners=False)
        return self.relu(self.bn(self.conv(torch.cat([x1, x2], dim=1))))


class RSU(nn.Module):
    def __init__(self, height: int, in_ch: int, mid_ch: int, out_ch: int):
        super().__init__()

        assert height >= 2
        self.conv_in = ConvBNReLU(in_ch, out_ch)

        encode_list = [DownConvBNReLU(out_ch, mid_ch, flag=False)]
        decode_list = [UpConvBNReLU(mid_ch * 2, mid_ch, flag=False)]
        for i in range(height - 2):
            encode_list.append(DownConvBNReLU(mid_ch, mid_ch))
            decode_list.append(UpConvBNReLU(mid_ch * 2, mid_ch if i < height - 3 else out_ch))

        encode_list.append(ConvBNReLU(mid_ch, mid_ch, dilation=2))
        self.encode_modules = nn.ModuleList(encode_list)
        self.decode_modules = nn.ModuleList(decode_list)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x_in = self.conv_in(x)

        x = x_in
        encode_outputs = []
        for m in self.encode_modules:
            x = m(x)
            encode_outputs.append(x)

        x = encode_outputs.pop()
        for m in self.decode_modules:
            x2 = encode_outputs.pop()
            x = m(x, x2)

        return x + x_in


class RSU4F(nn.Module):
    def __init__(self, in_ch: int, mid_ch: int, out_ch: int):
        super().__init__()
        self.conv_in = ConvBNReLU(in_ch, out_ch)
        self.encode_modules = nn.ModuleList([ConvBNReLU(out_ch, mid_ch),
                                             ConvBNReLU(mid_ch, mid_ch, dilation=2),
                                             ConvBNReLU(mid_ch, mid_ch, dilation=4),
                                             ConvBNReLU(mid_ch, mid_ch, dilation=8)])

        self.decode_modules = nn.ModuleList([ConvBNReLU(mid_ch * 2, mid_ch, dilation=4),
                                             ConvBNReLU(mid_ch * 2, mid_ch, dilation=2),
                                             ConvBNReLU(mid_ch * 2, out_ch)])

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x_in = self.conv_in(x)

        x = x_in
        encode_outputs = []
        for m in self.encode_modules:
            x = m(x)
            encode_outputs.append(x)

        x = encode_outputs.pop()
        for m in self.decode_modules:
            x2 = encode_outputs.pop()
            x = m(torch.cat([x, x2], dim=1))

        return x + x_in


class U2Net(nn.Module):
    def __init__(self, cfg: dict, out_ch: int = 1):
        super().__init__()
        assert "encode" in cfg
        assert "decode" in cfg
        self.encode_num = len(cfg["encode"])

        encode_list = []
        side_list = []
        for c in cfg["encode"]:
            # c: [height, in_ch, mid_ch, out_ch, RSU4F, side]
            assert len(c) == 6
            encode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4]))

            if c[5] is True:
                side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1))
        self.encode_modules = nn.ModuleList(encode_list)

        decode_list = []
        for c in cfg["decode"]:
            # c: [height, in_ch, mid_ch, out_ch, RSU4F, side]
            assert len(c) == 6
            decode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4]))

            if c[5] is True:
                side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1))
        self.decode_modules = nn.ModuleList(decode_list)
        self.side_modules = nn.ModuleList(side_list)
        self.out_conv = nn.Conv2d(self.encode_num * out_ch, out_ch, kernel_size=1)

    def forward(self, x: torch.Tensor) -> Union[torch.Tensor, List[torch.Tensor]]:
        _, _, h, w = x.shape

        # collect encode outputs
        encode_outputs = []
        for i, m in enumerate(self.encode_modules):
            x = m(x)
            encode_outputs.append(x)
            if i != self.encode_num - 1:
                x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)

        # collect decode outputs
        x = encode_outputs.pop()
        decode_outputs = [x]
        for m in self.decode_modules:
            x2 = encode_outputs.pop()
            x = F.interpolate(x, size=x2.shape[2:], mode='bilinear', align_corners=False)
            x = m(torch.concat([x, x2], dim=1))
            decode_outputs.insert(0, x)

        # collect side outputs
        side_outputs = []
        for m in self.side_modules:
            x = decode_outputs.pop()
            x = F.interpolate(m(x), size=[h, w], mode='bilinear', align_corners=False)
            side_outputs.insert(0, x)

        x = self.out_conv(torch.concat(side_outputs, dim=1))

        if self.training:
            # do not use torch.sigmoid for amp safe
            return [x] + side_outputs
        else:
            return torch.sigmoid(x)


def u2net_full(out_ch: int = 1):
    cfg = {
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "encode": [[7, 3, 32, 64, False, False],      # En1
                   [6, 64, 32, 128, False, False],    # En2
                   [5, 128, 64, 256, False, False],   # En3
                   [4, 256, 128, 512, False, False],  # En4
                   [4, 512, 256, 512, True, False],   # En5
                   [4, 512, 256, 512, True, True]],   # En6
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "decode": [[4, 1024, 256, 512, True, True],   # De5
                   [4, 1024, 128, 256, False, True],  # De4
                   [5, 512, 64, 128, False, True],    # De3
                   [6, 256, 32, 64, False, True],     # De2
                   [7, 128, 16, 64, False, True]]     # De1
    }

    return U2Net(cfg, out_ch)


def u2net_lite(out_ch: int = 1):
    cfg = {
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "encode": [[7, 3, 16, 64, False, False],  # En1
                   [6, 64, 16, 64, False, False],  # En2
                   [5, 64, 16, 64, False, False],  # En3
                   [4, 64, 16, 64, False, False],  # En4
                   [4, 64, 16, 64, True, False],  # En5
                   [4, 64, 16, 64, True, True]],  # En6
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "decode": [[4, 128, 16, 64, True, True],  # De5
                   [4, 128, 16, 64, False, True],  # De4
                   [5, 128, 16, 64, False, True],  # De3
                   [6, 128, 16, 64, False, True],  # De2
                   [7, 128, 16, 64, False, True]]  # De1
    }

    return U2Net(cfg, out_ch)


def convert_onnx(m, save_path):
    m.eval()
    x = torch.rand(1, 3, 288, 288, requires_grad=True)

    # export the model
    torch.onnx.export(m,  # model being run
                      x,  # model input (or a tuple for multiple inputs)
                      save_path,  # where to save the model (can be a file or file-like object)
                      export_params=True,
                      opset_version=11)


if __name__ == '__main__':
    # n_m = RSU(height=7, in_ch=3, mid_ch=12, out_ch=3)
    # convert_onnx(n_m, "RSU7.onnx")
    #
    # n_m = RSU4F(in_ch=3, mid_ch=12, out_ch=3)
    # convert_onnx(n_m, "RSU4F.onnx")

    u2net = u2net_full()
    convert_onnx(u2net, "u2net_full.onnx")


================================================
FILE: pytorch_segmentation/u2net/train.py
================================================
import os
import time
import datetime
from typing import Union, List

import torch
from torch.utils import data

from src import u2net_full
from train_utils import train_one_epoch, evaluate, get_params_groups, create_lr_scheduler
from my_dataset import DUTSDataset
import transforms as T


class SODPresetTrain:
    def __init__(self, base_size: Union[int, List[int]], crop_size: int,
                 hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Resize(base_size, resize_mask=True),
            T.RandomCrop(crop_size),
            T.RandomHorizontalFlip(hflip_prob),
            T.Normalize(mean=mean, std=std)
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


class SODPresetEval:
    def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Resize(base_size, resize_mask=False),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size

    # 用来保存训练以及验证过程中信息
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    train_dataset = DUTSDataset(args.data_path, train=True, transforms=SODPresetTrain([320, 320], crop_size=288))
    val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320]))

    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    train_data_loader = data.DataLoader(train_dataset,
                                        batch_size=batch_size,
                                        num_workers=num_workers,
                                        shuffle=True,
                                        pin_memory=True,
                                        collate_fn=train_dataset.collate_fn)

    val_data_loader = data.DataLoader(val_dataset,
                                      batch_size=1,  # must be 1
                                      num_workers=num_workers,
                                      pin_memory=True,
                                      collate_fn=val_dataset.collate_fn)

    model = u2net_full()
    model.to(device)

    params_group = get_params_groups(model, weight_decay=args.weight_decay)
    optimizer = torch.optim.AdamW(params_group, lr=args.lr, weight_decay=args.weight_decay)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs,
                                       warmup=True, warmup_epochs=2)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    current_mae, current_f1 = 1.0, 0.0
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        save_file = {"model": model.state_dict(),
                     "optimizer": optimizer.state_dict(),
                     "lr_scheduler": lr_scheduler.state_dict(),
                     "epoch": epoch,
                     "args": args}
        if args.amp:
            save_file["scaler"] = scaler.state_dict()

        if epoch % args.eval_interval == 0 or epoch == args.epochs - 1:
            # 每间隔eval_interval个epoch验证一次，减少验证频率节省训练时间
            mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)
            mae_info, f1_info = mae_metric.compute(), f1_metric.compute()
            print(f"[epoch: {epoch}] val_MAE: {mae_info:.3f} val_maxF1: {f1_info:.3f}")
            # write into txt
            with open(results_file, "a") as f:
                # 记录每个epoch对应的train_loss、lr以及验证集各指标
                write_info = f"[epoch: {epoch}] train_loss: {mean_loss:.4f} lr: {lr:.6f} " \
                             f"MAE: {mae_info:.3f} maxF1: {f1_info:.3f} \n"
                f.write(write_info)

            # save_best
            if current_mae >= mae_info and current_f1 <= f1_info:
                torch.save(save_file, "save_weights/model_best.pth")

        # only save latest 10 epoch weights
        if os.path.exists(f"save_weights/model_{epoch-10}.pth"):
            os.remove(f"save_weights/model_{epoch-10}.pth")

        torch.save(save_file, f"save_weights/model_{epoch}.pth")

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("training time {}".format(total_time_str))


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch u2net training")

    parser.add_argument("--data-path", default="./", help="DUTS root")
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument("-b", "--batch-size", default=16, type=int)
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument("--epochs", default=360, type=int, metavar="N",
                        help="number of total epochs to train")
    parser.add_argument("--eval-interval", default=10, type=int, help="validation interval default 10 Epochs")

    parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
    parser.add_argument('--print-freq', default=50, type=int, help='print frequency')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    # Mixed precision training parameters
    parser.add_argument("--amp", action='store_true',
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()

    if not os.path.exists("./save_weights"):
        os.mkdir("./save_weights")

    main(args)


================================================
FILE: pytorch_segmentation/u2net/train_multi_GPU.py
================================================
import time
import os
import datetime
from typing import Union, List

import torch
from torch.utils import data

from src import u2net_full
from train_utils import (train_one_epoch, evaluate, init_distributed_mode, save_on_master, mkdir,
                         create_lr_scheduler, get_params_groups)
from my_dataset import DUTSDataset
import transforms as T


class SODPresetTrain:
    def __init__(self, base_size: Union[int, List[int]], crop_size: int,
                 hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Resize(base_size, resize_mask=True),
            T.RandomCrop(crop_size),
            T.RandomHorizontalFlip(hflip_prob),
            T.Normalize(mean=mean, std=std)
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


class SODPresetEval:
    def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Resize(base_size, resize_mask=False),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # 用来保存训练以及验证过程中信息
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    train_dataset = DUTSDataset(args.data_path, train=True, transforms=SODPresetTrain([320, 320], crop_size=288))
    val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320]))

    print("Creating data loaders")
    if args.distributed:
        train_sampler = data.distributed.DistributedSampler(train_dataset)
        test_sampler = data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = data.RandomSampler(train_dataset)
        test_sampler = data.SequentialSampler(val_dataset)

    train_data_loader = data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=train_sampler, num_workers=args.workers,
        pin_memory=True, collate_fn=train_dataset.collate_fn, drop_last=True)

    val_data_loader = data.DataLoader(
        val_dataset, batch_size=1,  # batch_size must be 1
        sampler=test_sampler, num_workers=args.workers,
        pin_memory=True, collate_fn=train_dataset.collate_fn)

    # create model num_classes equal background + 20 classes
    model = u2net_full()
    model.to(device)

    if args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params_group = get_params_groups(model, weight_decay=args.weight_decay)
    optimizer = torch.optim.AdamW(params_group, lr=args.lr, weight_decay=args.weight_decay)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs,
                                       warmup=True, warmup_epochs=2)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)
        print(mae_metric, f1_metric)
        return

    print("Start training")
    current_mae, current_f1 = 1.0, 0.0
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        save_file = {'model': model_without_ddp.state_dict(),
                     'optimizer': optimizer.state_dict(),
                     "lr_scheduler": lr_scheduler.state_dict(),
                     'args': args,
                     'epoch': epoch}
        if args.amp:
            save_file["scaler"] = scaler.state_dict()

        if epoch % args.eval_interval == 0 or epoch == args.epochs - 1:
            # 每间隔eval_interval个epoch验证一次，减少验证频率节省训练时间
            mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)
            mae_info, f1_info = mae_metric.compute(), f1_metric.compute()
            print(f"[epoch: {epoch}] val_MAE: {mae_info:.3f} val_maxF1: {f1_info:.3f}")

            # 只在主进程上进行写操作
            if args.rank in [-1, 0]:
                # write into txt
                with open(results_file, "a") as f:
                    # 记录每个epoch对应的train_loss、lr以及验证集各指标
                    write_info = f"[epoch: {epoch}] train_loss: {mean_loss:.4f} lr: {lr:.6f} " \
                                 f"MAE: {mae_info:.3f} maxF1: {f1_info:.3f} \n"
                    f.write(write_info)

                # save_best
                if current_mae >= mae_info and current_f1 <= f1_info:
                    if args.output_dir:
                        # 只在主节点上执行保存权重操作
                        save_on_master(save_file,
                                       os.path.join(args.output_dir, 'model_best.pth'))

        if args.output_dir:
            if args.rank in [-1, 0]:
                # only save latest 10 epoch weights
                if os.path.exists(os.path.join(args.output_dir, f'model_{epoch - 10}.pth')):
                    os.remove(os.path.join(args.output_dir, f'model_{epoch - 10}.pth'))

            # 只在主节点上执行保存权重操作
            save_on_master(save_file,
                           os.path.join(args.output_dir, f'model_{epoch}.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(VOCdevkit)
    parser.add_argument('--data-path', default='./', help='DUTS root')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=16, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start-epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=360, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢
    parser.add_argument('--sync-bn', action='store_true', help='whether using SyncBatchNorm')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 训练学习率
    parser.add_argument('--lr', default=0.001, type=float,
                        help='initial learning rate')
    # 验证频率
    parser.add_argument("--eval-interval", default=10, type=int, help="validation interval default 10 Epochs")
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 分布式进程数
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # Mixed precision training parameters
    parser.add_argument("--amp", action='store_true',
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_segmentation/u2net/train_utils/__init__.py
================================================
from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler, get_params_groups
from .distributed_utils import init_distributed_mode, save_on_master, mkdir


================================================
FILE: pytorch_segmentation/u2net/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import time
import torch
import torch.distributed as dist
import torch.nn.functional as F

import errno
import os


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    收集各个进程中的数据
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()  # 进程数
    if world_size == 1:
        return [data]

    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)

    return data_list


class MeanAbsoluteError(object):
    def __init__(self):
        self.mae_list = []

    def update(self, pred: torch.Tensor, gt: torch.Tensor):
        batch_size, c, h, w = gt.shape
        assert batch_size == 1, f"validation mode batch_size must be 1, but got batch_size: {batch_size}."
        resize_pred = F.interpolate(pred, (h, w), mode="bilinear", align_corners=False)
        error_pixels = torch.sum(torch.abs(resize_pred - gt), dim=(1, 2, 3)) / (h * w)
        self.mae_list.extend(error_pixels.tolist())

    def compute(self):
        mae = sum(self.mae_list) / len(self.mae_list)
        return mae

    def gather_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        gather_mae_list = []
        for i in all_gather(self.mae_list):
            gather_mae_list.extend(i)
        self.mae_list = gather_mae_list

    def __str__(self):
        mae = self.compute()
        return f'MAE: {mae:.3f}'


class F1Score(object):
    """
    refer: https://github.com/xuebinqin/DIS/blob/main/IS-Net/basics.py
    """

    def __init__(self, threshold: float = 0.5):
        self.precision_cum = None
        self.recall_cum = None
        self.num_cum = None
        self.threshold = threshold

    def update(self, pred: torch.Tensor, gt: torch.Tensor):
        batch_size, c, h, w = gt.shape
        assert batch_size == 1, f"validation mode batch_size must be 1, but got batch_size: {batch_size}."
        resize_pred = F.interpolate(pred, (h, w), mode="bilinear", align_corners=False)
        gt_num = torch.sum(torch.gt(gt, self.threshold).float())

        pp = resize_pred[torch.gt(gt, self.threshold)]  # 对应预测map中GT为前景的区域
        nn = resize_pred[torch.le(gt, self.threshold)]  # 对应预测map中GT为背景的区域

        pp_hist = torch.histc(pp, bins=255, min=0.0, max=1.0)
        nn_hist = torch.histc(nn, bins=255, min=0.0, max=1.0)

        # Sort according to the prediction probability from large to small
        pp_hist_flip = torch.flipud(pp_hist)
        nn_hist_flip = torch.flipud(nn_hist)

        pp_hist_flip_cum = torch.cumsum(pp_hist_flip, dim=0)
        nn_hist_flip_cum = torch.cumsum(nn_hist_flip, dim=0)

        precision = pp_hist_flip_cum / (pp_hist_flip_cum + nn_hist_flip_cum + 1e-4)
        recall = pp_hist_flip_cum / (gt_num + 1e-4)

        if self.precision_cum is None:
            self.precision_cum = torch.full_like(precision, fill_value=0.)

        if self.recall_cum is None:
            self.recall_cum = torch.full_like(recall, fill_value=0.)

        if self.num_cum is None:
            self.num_cum = torch.zeros([1], dtype=gt.dtype, device=gt.device)

        self.precision_cum += precision
        self.recall_cum += recall
        self.num_cum += batch_size

    def compute(self):
        pre_mean = self.precision_cum / self.num_cum
        rec_mean = self.recall_cum / self.num_cum
        f1_mean = (1 + 0.3) * pre_mean * rec_mean / (0.3 * pre_mean + rec_mean + 1e-8)
        max_f1 = torch.amax(f1_mean).item()
        return max_f1

    def reduce_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        torch.distributed.all_reduce(self.precision_cum)
        torch.distributed.all_reduce(self.recall_cum)
        torch.distributed.all_reduce(self.num_cum)

    def __str__(self):
        max_f1 = self.compute()
        return f'maxF1: {max_f1:.3f}'


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {}'.format(header, total_time_str))


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    elif hasattr(args, "rank"):
        pass
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_segmentation/u2net/train_utils/train_and_eval.py
================================================
import math
import torch
from torch.nn import functional as F
import train_utils.distributed_utils as utils


def criterion(inputs, target):
    losses = [F.binary_cross_entropy_with_logits(inputs[i], target) for i in range(len(inputs))]
    total_loss = sum(losses)

    return total_loss


def evaluate(model, data_loader, device):
    model.eval()
    mae_metric = utils.MeanAbsoluteError()
    f1_metric = utils.F1Score()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for images, targets in metric_logger.log_every(data_loader, 100, header):
            images, targets = images.to(device), targets.to(device)
            output = model(images)

            # post norm
            # ma = torch.max(output)
            # mi = torch.min(output)
            # output = (output - mi) / (ma - mi)

            mae_metric.update(output, targets)
            f1_metric.update(output, targets)

        mae_metric.gather_from_all_processes()
        f1_metric.reduce_from_all_processes()

    return mae_metric, f1_metric


def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for image, target in metric_logger.log_every(data_loader, print_freq, header):
        image, target = image.to(device), target.to(device)
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            output = model(image)
            loss = criterion(output, target)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        lr_scheduler.step()

        lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(loss=loss.item(), lr=lr)

    return metric_logger.meters["loss"].global_avg, lr


def create_lr_scheduler(optimizer,
                        num_step: int,
                        epochs: int,
                        warmup=True,
                        warmup_epochs=1,
                        warmup_factor=1e-3,
                        end_factor=1e-6):
    assert num_step > 0 and epochs > 0
    if warmup is False:
        warmup_epochs = 0

    def f(x):
        """
        根据step数返回一个学习率倍率因子，
        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法
        """
        if warmup is True and x <= (warmup_epochs * num_step):
            alpha = float(x) / (warmup_epochs * num_step)
            # warmup过程中lr倍率因子从warmup_factor -> 1
            return warmup_factor * (1 - alpha) + alpha
        else:
            current_step = (x - warmup_epochs * num_step)
            cosine_steps = (epochs - warmup_epochs) * num_step
            # warmup后lr倍率因子从1 -> end_factor
            return ((1 + math.cos(current_step * math.pi / cosine_steps)) / 2) * (1 - end_factor) + end_factor

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


def get_params_groups(model: torch.nn.Module, weight_decay: float = 1e-4):
    params_group = [{"params": [], "weight_decay": 0.},  # no decay
                    {"params": [], "weight_decay": weight_decay}]  # with decay

    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue  # frozen weights

        if len(param.shape) == 1 or name.endswith(".bias"):
            # bn:(weight,bias)  conv2d:(bias)  linear:(bias)
            params_group[0]["params"].append(param)  # no decay
        else:
            params_group[1]["params"].append(param)  # with decay

    return params_group


================================================
FILE: pytorch_segmentation/u2net/transforms.py
================================================
import random
from typing import List, Union
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target=None):
        for t in self.transforms:
            image, target = t(image, target)

        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        target = F.to_tensor(target)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.flip_prob = prob

    def __call__(self, image, target):
        if random.random() < self.flip_prob:
            image = F.hflip(image)
            target = F.hflip(target)
        return image, target


class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target


class Resize(object):
    def __init__(self, size: Union[int, List[int]], resize_mask: bool = True):
        self.size = size  # [h, w]
        self.resize_mask = resize_mask

    def __call__(self, image, target=None):
        image = F.resize(image, self.size)
        if self.resize_mask is True:
            target = F.resize(target, self.size)

        return image, target


class RandomCrop(object):
    def __init__(self, size: int):
        self.size = size

    def pad_if_smaller(self, img, fill=0):
        # 如果图像最小边长小于给定size，则用数值fill进行padding
        min_size = min(img.shape[-2:])
        if min_size < self.size:
            ow, oh = img.size
            padh = self.size - oh if oh < self.size else 0
            padw = self.size - ow if ow < self.size else 0
            img = F.pad(img, [0, 0, padw, padh], fill=fill)
        return img

    def __call__(self, image, target):
        image = self.pad_if_smaller(image)
        target = self.pad_if_smaller(target)
        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
        image = F.crop(image, *crop_params)
        target = F.crop(target, *crop_params)
        return image, target


================================================
FILE: pytorch_segmentation/u2net/validation.py
================================================
import os
from typing import Union, List

import torch
from torch.utils import data

from src import u2net_full
from train_utils import evaluate
from my_dataset import DUTSDataset
import transforms as T


class SODPresetEval:
    def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Resize(base_size, resize_mask=False),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    assert os.path.exists(args.weights), f"weights {args.weights} not found."

    val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320]))

    num_workers = 4
    val_data_loader = data.DataLoader(val_dataset,
                                      batch_size=1,  # must be 1
                                      num_workers=num_workers,
                                      pin_memory=True,
                                      shuffle=False,
                                      collate_fn=val_dataset.collate_fn)

    model = u2net_full()
    pretrain_weights = torch.load(args.weights, map_location='cpu')
    if "model" in pretrain_weights:
        model.load_state_dict(pretrain_weights["model"])
    else:
        model.load_state_dict(pretrain_weights)
    model.to(device)

    mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)
    print(mae_metric, f1_metric)


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch u2net validation")

    parser.add_argument("--data-path", default="./", help="DUTS root")
    parser.add_argument("--weights", default="./u2net_full.pth")
    parser.add_argument("--device", default="cuda:0", help="training device")
    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()
    main(args)


================================================
FILE: pytorch_segmentation/unet/README.md
================================================
# U-Net(Convolutional Networks for Biomedical Image Segmentation)

## 该项目主要参考以下开源仓库
* [https://github.com/milesial/Pytorch-UNet](https://github.com/milesial/Pytorch-UNet)
* [https://github.com/pytorch/vision](https://github.com/pytorch/vision)

## 环境配置：
* Python3.6/3.7/3.8
* Pytorch1.10
* Ubuntu或Centos(Windows暂不支持多GPU训练)
* 最好使用GPU训练
* 详细环境配置见`requirements.txt`

## 文件结构：
```
  ├── src: 搭建U-Net模型代码
  ├── train_utils: 训练、验证以及多GPU训练相关模块
  ├── my_dataset.py: 自定义dataset用于读取DRIVE数据集(视网膜血管分割)
  ├── train.py: 以单GPU为例进行训练
  ├── train_multi_GPU.py: 针对使用多GPU的用户使用
  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试
  └── compute_mean_std.py: 统计数据集各通道的均值和标准差
```

## DRIVE数据集下载地址：
* 官网地址： [https://drive.grand-challenge.org/](https://drive.grand-challenge.org/)
* 百度云链接： [https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw](https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw)  密码: 8no8


## 训练方法
* 确保提前准备好数据集
* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本
* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量
* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)
* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`

## 注意事项
* 在使用训练脚本时，注意要将`--data-path`设置为自己存放`DRIVE`文件夹所在的**根目录**
* 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。
* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights`即可，其他代码尽量不要改动

## 使用U-Net在DRIVE数据集上训练得到的权重(仅供测试使用)
- 链接: https://pan.baidu.com/s/1BOqkEpgt1XRqziyc941Hcw  密码: p50a

## 如果对U-Net网络不了解的可参考我的bilibili
* [https://www.bilibili.com/video/BV1Vq4y127fB/](https://www.bilibili.com/video/BV1Vq4y127fB/)


## 进一步了解该项目，以及对U-Net代码的分析可参考我的bilibili
* [https://b23.tv/PCJJmqN](https://b23.tv/PCJJmqN)

## 本项目U-Net默认使用双线性插值做为上采样，结构图如下
![u-net](unet.png)


================================================
FILE: pytorch_segmentation/unet/compute_mean_std.py
================================================
import os
from PIL import Image
import numpy as np


def main():
    img_channels = 3
    img_dir = "./DRIVE/training/images"
    roi_dir = "./DRIVE/training/mask"
    assert os.path.exists(img_dir), f"image dir: '{img_dir}' does not exist."
    assert os.path.exists(roi_dir), f"roi dir: '{roi_dir}' does not exist."

    img_name_list = [i for i in os.listdir(img_dir) if i.endswith(".tif")]
    cumulative_mean = np.zeros(img_channels)
    cumulative_std = np.zeros(img_channels)
    for img_name in img_name_list:
        img_path = os.path.join(img_dir, img_name)
        ori_path = os.path.join(roi_dir, img_name.replace(".tif", "_mask.gif"))
        img = np.array(Image.open(img_path)) / 255.
        roi_img = np.array(Image.open(ori_path).convert('L'))

        img = img[roi_img == 255]
        cumulative_mean += img.mean(axis=0)
        cumulative_std += img.std(axis=0)

    mean = cumulative_mean / len(img_name_list)
    std = cumulative_std / len(img_name_list)
    print(f"mean: {mean}")
    print(f"std: {std}")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/unet/my_dataset.py
================================================
import os
from PIL import Image
import numpy as np
from torch.utils.data import Dataset


class DriveDataset(Dataset):
    def __init__(self, root: str, train: bool, transforms=None):
        super(DriveDataset, self).__init__()
        self.flag = "training" if train else "test"
        data_root = os.path.join(root, "DRIVE", self.flag)
        assert os.path.exists(data_root), f"path '{data_root}' does not exists."
        self.transforms = transforms
        img_names = [i for i in os.listdir(os.path.join(data_root, "images")) if i.endswith(".tif")]
        self.img_list = [os.path.join(data_root, "images", i) for i in img_names]
        self.manual = [os.path.join(data_root, "1st_manual", i.split("_")[0] + "_manual1.gif")
                       for i in img_names]
        # check files
        for i in self.manual:
            if os.path.exists(i) is False:
                raise FileNotFoundError(f"file {i} does not exists.")

        self.roi_mask = [os.path.join(data_root, "mask", i.split("_")[0] + f"_{self.flag}_mask.gif")
                         for i in img_names]
        # check files
        for i in self.roi_mask:
            if os.path.exists(i) is False:
                raise FileNotFoundError(f"file {i} does not exists.")

    def __getitem__(self, idx):
        img = Image.open(self.img_list[idx]).convert('RGB')
        manual = Image.open(self.manual[idx]).convert('L')
        manual = np.array(manual) / 255
        roi_mask = Image.open(self.roi_mask[idx]).convert('L')
        roi_mask = 255 - np.array(roi_mask)
        mask = np.clip(manual + roi_mask, a_min=0, a_max=255)

        # 这里转回PIL的原因是，transforms中是对PIL数据进行处理
        mask = Image.fromarray(mask)

        if self.transforms is not None:
            img, mask = self.transforms(img, mask)

        return img, mask

    def __len__(self):
        return len(self.img_list)

    @staticmethod
    def collate_fn(batch):
        images, targets = list(zip(*batch))
        batched_imgs = cat_list(images, fill_value=0)
        batched_targets = cat_list(targets, fill_value=255)
        return batched_imgs, batched_targets


def cat_list(images, fill_value=0):
    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
    batch_shape = (len(images),) + max_size
    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
    for img, pad_img in zip(images, batched_imgs):
        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
    return batched_imgs


================================================
FILE: pytorch_segmentation/unet/predict.py
================================================
import os
import time

import torch
from torchvision import transforms
import numpy as np
from PIL import Image

from src import UNet


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def main():
    classes = 1  # exclude background
    weights_path = "./save_weights/best_model.pth"
    img_path = "./DRIVE/test/images/01_test.tif"
    roi_mask_path = "./DRIVE/test/mask/01_test_mask.gif"
    assert os.path.exists(weights_path), f"weights {weights_path} not found."
    assert os.path.exists(img_path), f"image {img_path} not found."
    assert os.path.exists(roi_mask_path), f"image {roi_mask_path} not found."

    mean = (0.709, 0.381, 0.224)
    std = (0.127, 0.079, 0.043)

    # get devices
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    # create model
    model = UNet(in_channels=3, num_classes=classes+1, base_c=32)

    # load weights
    model.load_state_dict(torch.load(weights_path, map_location='cpu')['model'])
    model.to(device)

    # load roi mask
    roi_img = Image.open(roi_mask_path).convert('L')
    roi_img = np.array(roi_img)

    # load image
    original_img = Image.open(img_path).convert('RGB')

    # from pil image to tensor and normalize
    data_transform = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize(mean=mean, std=std)])
    img = data_transform(original_img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    model.eval()  # 进入验证模式
    with torch.no_grad():
        # init model
        img_height, img_width = img.shape[-2:]
        init_img = torch.zeros((1, 3, img_height, img_width), device=device)
        model(init_img)

        t_start = time_synchronized()
        output = model(img.to(device))
        t_end = time_synchronized()
        print("inference time: {}".format(t_end - t_start))

        prediction = output['out'].argmax(1).squeeze(0)
        prediction = prediction.to("cpu").numpy().astype(np.uint8)
        # 将前景对应的像素值改成255(白色)
        prediction[prediction == 1] = 255
        # 将不敢兴趣的区域像素设置成0(黑色)
        prediction[roi_img == 0] = 0
        mask = Image.fromarray(prediction)
        mask.save("test_result.png")


if __name__ == '__main__':
    main()


================================================
FILE: pytorch_segmentation/unet/requirements.txt
================================================
numpy==1.22.0
torch==1.13.1
torchvision==0.11.1
Pillow


================================================
FILE: pytorch_segmentation/unet/results20220109-165837.txt
================================================
[epoch: 0]
train_loss: 1.3541
lr: 0.010000
dice coefficient: 0.111
global correct: 71.1
average row correct: ['78.2', '22.4']
IoU: ['70.2', '9.0']
mean IoU: 39.6

[epoch: 1]
train_loss: 1.0442
lr: 0.009955
dice coefficient: 0.014
global correct: 85.7
average row correct: ['98.0', '1.0']
IoU: ['85.7', '0.9']
mean IoU: 43.3

[epoch: 2]
train_loss: 0.9315
lr: 0.009910
dice coefficient: 0.000
global correct: 87.2
average row correct: ['99.9', '0.0']
IoU: ['87.2', '0.0']
mean IoU: 43.6

[epoch: 3]
train_loss: 0.7929
lr: 0.009864
dice coefficient: 0.021
global correct: 87.4
average row correct: ['100.0', '1.1']
IoU: ['87.4', '1.1']
mean IoU: 44.3

[epoch: 4]
train_loss: 0.7329
lr: 0.009819
dice coefficient: 0.210
global correct: 88.8
average row correct: ['99.9', '12.5']
IoU: ['88.6', '12.4']
mean IoU: 50.5

[epoch: 5]
train_loss: 0.6343
lr: 0.009774
dice coefficient: 0.440
global correct: 90.5
average row correct: ['99.3', '30.8']
IoU: ['90.2', '29.3']
mean IoU: 59.7

[epoch: 6]
train_loss: 0.6105
lr: 0.009728
dice coefficient: 0.575
global correct: 91.8
average row correct: ['98.6', '45.5']
IoU: ['91.3', '41.5']
mean IoU: 66.4

[epoch: 7]
train_loss: 0.5982
lr: 0.009683
dice coefficient: 0.651
global correct: 91.3
average row correct: ['95.1', '64.8']
IoU: ['90.5', '48.5']
mean IoU: 69.5

[epoch: 8]
train_loss: 0.6641
lr: 0.009637
dice coefficient: 0.651
global correct: 90.7
average row correct: ['94.0', '67.7']
IoU: ['89.8', '48.0']
mean IoU: 68.9

[epoch: 9]
train_loss: 0.5530
lr: 0.009592
dice coefficient: 0.550
global correct: 80.7
average row correct: ['81.0', '78.3']
IoU: ['78.5', '34.0']
mean IoU: 56.3

[epoch: 10]
train_loss: 0.5676
lr: 0.009547
dice coefficient: 0.659
global correct: 89.2
average row correct: ['91.0', '77.0']
IoU: ['88.1', '47.6']
mean IoU: 67.8

[epoch: 11]
train_loss: 0.5494
lr: 0.009501
dice coefficient: 0.654
global correct: 90.0
average row correct: ['92.8', '70.5']
IoU: ['89.0', '47.3']
mean IoU: 68.1

[epoch: 12]
train_loss: 0.5293
lr: 0.009456
dice coefficient: 0.713
global correct: 91.9
average row correct: ['94.6', '73.2']
IoU: ['91.1', '53.5']
mean IoU: 72.3

[epoch: 13]
train_loss: 0.5291
lr: 0.009410
dice coefficient: 0.689
global correct: 91.2
average row correct: ['93.6', '75.1']
IoU: ['90.3', '52.2']
mean IoU: 71.3

[epoch: 14]
train_loss: 0.5163
lr: 0.009365
dice coefficient: 0.691
global correct: 92.6
average row correct: ['96.3', '67.0']
IoU: ['91.9', '53.4']
mean IoU: 72.6

[epoch: 15]
train_loss: 0.5168
lr: 0.009319
dice coefficient: 0.722
global correct: 93.3
average row correct: ['96.6', '70.3']
IoU: ['92.6', '57.1']
mean IoU: 74.9

[epoch: 16]
train_loss: 0.5153
lr: 0.009273
dice coefficient: 0.740
global correct: 94.0
average row correct: ['97.7', '68.3']
IoU: ['93.4', '59.1']
mean IoU: 76.3

[epoch: 17]
train_loss: 0.4923
lr: 0.009228
dice coefficient: 0.734
global correct: 93.6
average row correct: ['96.9', '70.7']
IoU: ['92.9', '58.4']
mean IoU: 75.7

[epoch: 18]
train_loss: 0.4692
lr: 0.009182
dice coefficient: 0.740
global correct: 93.7
average row correct: ['97.0', '71.2']
IoU: ['93.1', '59.2']
mean IoU: 76.1

[epoch: 19]
train_loss: 0.4701
lr: 0.009136
dice coefficient: 0.754
global correct: 94.0
average row correct: ['97.1', '72.9']
IoU: ['93.4', '60.8']
mean IoU: 77.1

[epoch: 20]
train_loss: 0.4710
lr: 0.009091
dice coefficient: 0.761
global correct: 94.0
average row correct: ['96.8', '75.0']
IoU: ['93.4', '61.5']
mean IoU: 77.5

[epoch: 21]
train_loss: 0.4624
lr: 0.009045
dice coefficient: 0.756
global correct: 94.1
average row correct: ['97.3', '72.3']
IoU: ['93.6', '61.1']
mean IoU: 77.3

[epoch: 22]
train_loss: 0.4480
lr: 0.008999
dice coefficient: 0.759
global correct: 94.3
average row correct: ['97.5', '71.9']
IoU: ['93.7', '61.5']
mean IoU: 77.6

[epoch: 23]
train_loss: 0.4342
lr: 0.008954
dice coefficient: 0.748
global correct: 94.3
average row correct: ['98.1', '68.2']
IoU: ['93.7', '60.2']
mean IoU: 77.0

[epoch: 24]
train_loss: 0.4465
lr: 0.008908
dice coefficient: 0.771
global correct: 94.5
average row correct: ['97.6', '73.4']
IoU: ['93.9', '63.0']
mean IoU: 78.5

[epoch: 25]
train_loss: 0.4295
lr: 0.008862
dice coefficient: 0.770
global correct: 94.5
average row correct: ['97.6', '73.1']
IoU: ['94.0', '62.9']
mean IoU: 78.4

[epoch: 26]
train_loss: 0.4246
lr: 0.008816
dice coefficient: 0.768
global correct: 94.6
average row correct: ['98.0', '71.2']
IoU: ['94.1', '62.6']
mean IoU: 78.3

[epoch: 27]
train_loss: 0.4180
lr: 0.008770
dice coefficient: 0.771
global correct: 94.6
average row correct: ['97.9', '72.1']
IoU: ['94.1', '63.0']
mean IoU: 78.5

[epoch: 28]
train_loss: 0.4408
lr: 0.008724
dice coefficient: 0.775
global correct: 94.5
average row correct: ['97.3', '75.3']
IoU: ['93.9', '63.5']
mean IoU: 78.7

[epoch: 29]
train_loss: 0.4323
lr: 0.008678
dice coefficient: 0.763
global correct: 94.5
average row correct: ['97.9', '70.9']
IoU: ['93.9', '62.0']
mean IoU: 78.0

[epoch: 30]
train_loss: 0.4144
lr: 0.008632
dice coefficient: 0.772
global correct: 94.3
average row correct: ['96.8', '76.9']
IoU: ['93.7', '63.1']
mean IoU: 78.4

[epoch: 31]
train_loss: 0.4130
lr: 0.008586
dice coefficient: 0.776
global correct: 94.6
average row correct: ['97.6', '74.0']
IoU: ['94.0', '63.6']
mean IoU: 78.8

[epoch: 32]
train_loss: 0.4109
lr: 0.008540
dice coefficient: 0.776
global correct: 94.6
average row correct: ['97.5', '74.8']
IoU: ['94.0', '63.6']
mean IoU: 78.8

[epoch: 33]
train_loss: 0.4190
lr: 0.008494
dice coefficient: 0.779
global correct: 94.7
average row correct: ['97.6', '74.3']
IoU: ['94.1', '64.0']
mean IoU: 79.1

[epoch: 34]
train_loss: 0.4163
lr: 0.008448
dice coefficient: 0.773
global correct: 94.6
average row correct: ['97.8', '72.6']
IoU: ['94.1', '63.2']
mean IoU: 78.6

[epoch: 35]
train_loss: 0.4064
lr: 0.008402
dice coefficient: 0.775
global correct: 94.7
average row correct: ['98.0', '72.1']
IoU: ['94.2', '63.5']
mean IoU: 78.8

[epoch: 36]
train_loss: 0.3986
lr: 0.008356
dice coefficient: 0.785
global correct: 94.7
average row correct: ['97.2', '77.1']
IoU: ['94.1', '64.8']
mean IoU: 79.4

[epoch: 37]
train_loss: 0.3959
lr: 0.008310
dice coefficient: 0.784
global correct: 94.8
average row correct: ['97.7', '74.9']
IoU: ['94.3', '64.7']
mean IoU: 79.5

[epoch: 38]
train_loss: 0.4058
lr: 0.008264
dice coefficient: 0.786
global correct: 94.7
average row correct: ['97.4', '76.4']
IoU: ['94.2', '64.9']
mean IoU: 79.5

[epoch: 39]
train_loss: 0.3934
lr: 0.008218
dice coefficient: 0.786
global correct: 94.8
average row correct: ['97.5', '76.2']
IoU: ['94.2', '64.9']
mean IoU: 79.5

[epoch: 40]
train_loss: 0.3926
lr: 0.008171
dice coefficient: 0.783
global correct: 94.7
average row correct: ['97.3', '76.4']
IoU: ['94.1', '64.6']
mean IoU: 79.3

[epoch: 41]
train_loss: 0.3880
lr: 0.008125
dice coefficient: 0.787
global correct: 94.8
average row correct: ['97.6', '75.6']
IoU: ['94.3', '65.1']
mean IoU: 79.7

[epoch: 42]
train_loss: 0.3964
lr: 0.008079
dice coefficient: 0.788
global correct: 94.8
average row correct: ['97.4', '77.1']
IoU: ['94.2', '65.3']
mean IoU: 79.7

[epoch: 43]
train_loss: 0.3980
lr: 0.008032
dice coefficient: 0.787
global correct: 94.7
average row correct: ['97.3', '77.0']
IoU: ['94.2', '65.1']
mean IoU: 79.6

[epoch: 44]
train_loss: 0.3846
lr: 0.007986
dice coefficient: 0.787
global correct: 94.7
average row correct: ['97.2', '77.4']
IoU: ['94.1', '65.1']
mean IoU: 79.6

[epoch: 45]
train_loss: 0.3832
lr: 0.007940
dice coefficient: 0.783
global correct: 94.7
average row correct: ['97.5', '75.6']
IoU: ['94.2', '64.6']
mean IoU: 79.4

[epoch: 46]
train_loss: 0.3839
lr: 0.007893
dice coefficient: 0.789
global correct: 94.9
average row correct: ['97.6', '76.1']
IoU: ['94.3', '65.4']
mean IoU: 79.8

[epoch: 47]
train_loss: 0.3739
lr: 0.007847
dice coefficient: 0.789
global correct: 94.8
average row correct: ['97.4', '76.8']
IoU: ['94.3', '65.4']
mean IoU: 79.8

[epoch: 48]
train_loss: 0.4064
lr: 0.007800
dice coefficient: 0.783
global correct: 94.7
average row correct: ['97.4', '76.0']
IoU: ['94.1', '64.4']
mean IoU: 79.3

[epoch: 49]
train_loss: 0.3878
lr: 0.007754
dice coefficient: 0.787
global correct: 94.7
average row correct: ['97.3', '77.1']
IoU: ['94.2', '65.1']
mean IoU: 79.6

[epoch: 50]
train_loss: 0.3856
lr: 0.007707
dice coefficient: 0.788
global correct: 94.8
average row correct: ['97.6', '76.2']
IoU: ['94.3', '65.3']
mean IoU: 79.8

[epoch: 51]
train_loss: 0.3883
lr: 0.007661
dice coefficient: 0.788
global correct: 94.5
average row correct: ['96.5', '80.7']
IoU: ['93.9', '65.2']
mean IoU: 79.5

[epoch: 52]
train_loss: 0.3965
lr: 0.007614
dice coefficient: 0.791
global correct: 94.8
average row correct: ['97.2', '78.1']
IoU: ['94.2', '65.5']
mean IoU: 79.9

[epoch: 53]
train_loss: 0.3851
lr: 0.007567
dice coefficient: 0.793
global correct: 94.9
average row correct: ['97.4', '77.5']
IoU: ['94.3', '65.9']
mean IoU: 80.1

[epoch: 54]
train_loss: 0.3859
lr: 0.007521
dice coefficient: 0.790
global correct: 94.9
average row correct: ['97.6', '76.2']
IoU: ['94.3', '65.5']
mean IoU: 79.9

[epoch: 55]
train_loss: 0.3801
lr: 0.007474
dice coefficient: 0.790
global correct: 94.8
average row correct: ['97.4', '77.2']
IoU: ['94.3', '65.5']
mean IoU: 79.9

[epoch: 56]
train_loss: 0.3928
lr: 0.007427
dice coefficient: 0.786
global correct: 94.9
average row correct: ['98.0', '73.7']
IoU: ['94.4', '64.9']
mean IoU: 79.7

[epoch: 57]
train_loss: 0.3930
lr: 0.007381
dice coefficient: 0.790
global correct: 94.6
average row correct: ['96.8', '79.6']
IoU: ['94.0', '65.4']
mean IoU: 79.7

[epoch: 58]
train_loss: 0.3738
lr: 0.007334
dice coefficient: 0.789
global correct: 94.9
average row correct: ['97.8', '75.0']
IoU: ['94.4', '65.4']
mean IoU: 79.9

[epoch: 59]
train_loss: 0.3706
lr: 0.007287
dice coefficient: 0.795
global correct: 94.8
average row correct: ['97.1', '79.5']
IoU: ['94.2', '66.1']
mean IoU: 80.2

[epoch: 60]
train_loss: 0.3783
lr: 0.007240
dice coefficient: 0.795
global correct: 95.0
average row correct: ['97.7', '76.7']
IoU: ['94.5', '66.2']
mean IoU: 80.3

[epoch: 61]
train_loss: 0.3656
lr: 0.007193
dice coefficient: 0.792
global correct: 95.1
average row correct: ['98.0', '74.6']
IoU: ['94.5', '65.7']
mean IoU: 80.1

[epoch: 62]
train_loss: 0.3773
lr: 0.007146
dice coefficient: 0.796
global correct: 95.0
average row correct: ['97.7', '76.6']
IoU: ['94.5', '66.3']
mean IoU: 80.4

[epoch: 63]
train_loss: 0.3703
lr: 0.007099
dice coefficient: 0.796
global correct: 95.1
average row correct: ['97.9', '75.8']
IoU: ['94.6', '66.2']
mean IoU: 80.4

[epoch: 64]
train_loss: 0.3630
lr: 0.007052
dice coefficient: 0.794
global correct: 94.8
average row correct: ['97.1', '79.4']
IoU: ['94.2', '66.0']
mean IoU: 80.1

[epoch: 65]
train_loss: 0.3680
lr: 0.007005
dice coefficient: 0.797
global correct: 95.1
average row correct: ['97.7', '76.8']
IoU: ['94.5', '66.4']
mean IoU: 80.5

[epoch: 66]
train_loss: 0.3557
lr: 0.006958
dice coefficient: 0.799
global correct: 95.1
average row correct: ['97.5', '77.9']
IoU: ['94.5', '66.7']
mean IoU: 80.6

[epoch: 67]
train_loss: 0.3759
lr: 0.006911
dice coefficient: 0.796
global correct: 94.8
average row correct: ['97.0', '80.0']
IoU: ['94.2', '66.3']
mean IoU: 80.2

[epoch: 68]
train_loss: 0.3638
lr: 0.006864
dice coefficient: 0.790
global correct: 95.0
average row correct: ['98.0', '74.3']
IoU: ['94.5', '65.4']
mean IoU: 79.9

[epoch: 69]
train_loss: 0.3540
lr: 0.006817
dice coefficient: 0.796
global correct: 94.6
average row correct: ['96.3', '83.0']
IoU: ['94.0', '66.2']
mean IoU: 80.1

[epoch: 70]
train_loss: 0.3602
lr: 0.006770
dice coefficient: 0.798
global correct: 94.9
average row correct: ['97.1', '79.7']
IoU: ['94.3', '66.6']
mean IoU: 80.4

[epoch: 71]
train_loss: 0.3597
lr: 0.006722
dice coefficient: 0.797
global correct: 95.0
average row correct: ['97.6', '77.2']
IoU: ['94.5', '66.5']
mean IoU: 80.5

[epoch: 72]
train_loss: 0.3618
lr: 0.006675
dice coefficient: 0.802
global correct: 95.1
average row correct: ['97.5', '78.8']
IoU: ['94.5', '67.1']
mean IoU: 80.8

[epoch: 73]
train_loss: 0.3582
lr: 0.006628
dice coefficient: 0.803
global correct: 95.1
average row correct: ['97.4', '79.3']
IoU: ['94.5', '67.2']
mean IoU: 80.9

[epoch: 74]
train_loss: 0.3624
lr: 0.006580
dice coefficient: 0.800
global correct: 95.1
average row correct: ['97.7', '77.3']
IoU: ['94.6', '66.8']
mean IoU: 80.7

[epoch: 75]
train_loss: 0.3648
lr: 0.006533
dice coefficient: 0.795
global correct: 95.1
average row correct: ['98.2', '74.5']
IoU: ['94.6', '66.1']
mean IoU: 80.4

[epoch: 76]
train_loss: 0.3553
lr: 0.006486
dice coefficient: 0.801
global correct: 95.0
average row correct: ['97.3', '79.6']
IoU: ['94.4', '67.0']
mean IoU: 80.7

[epoch: 77]
train_loss: 0.3632
lr: 0.006438
dice coefficient: 0.796
global correct: 94.6
average row correct: ['96.5', '82.2']
IoU: ['94.0', '66.1']
mean IoU: 80.1

[epoch: 78]
train_loss: 0.3511
lr: 0.006391
dice coefficient: 0.801
global correct: 95.2
average row correct: ['97.9', '76.5']
IoU: ['94.7', '67.0']
mean IoU: 80.8

[epoch: 79]
train_loss: 0.3602
lr: 0.006343
dice coefficient: 0.803
global correct: 95.2
average row correct: ['97.8', '77.4']
IoU: ['94.7', '67.2']
mean IoU: 80.9

[epoch: 80]
train_loss: 0.3585
lr: 0.006295
dice coefficient: 0.801
global correct: 94.9
average row correct: ['97.0', '80.8']
IoU: ['94.3', '67.0']
mean IoU: 80.7

[epoch: 81]
train_loss: 0.3543
lr: 0.006248
dice coefficient: 0.802
global correct: 95.1
average row correct: ['97.5', '78.6']
IoU: ['94.6', '67.1']
mean IoU: 80.8

[epoch: 82]
train_loss: 0.3689
lr: 0.006200
dice coefficient: 0.804
global correct: 95.1
average row correct: ['97.3', '79.8']
IoU: ['94.5', '67.4']
mean IoU: 80.9

[epoch: 83]
train_loss: 0.3588
lr: 0.006152
dice coefficient: 0.803
global correct: 94.9
average row correct: ['96.9', '81.6']
IoU: ['94.3', '67.2']
mean IoU: 80.8

[epoch: 84]
train_loss: 0.3640
lr: 0.006105
dice coefficient: 0.798
global correct: 94.9
average row correct: ['97.3', '78.9']
IoU: ['94.4', '66.4']
mean IoU: 80.4

[epoch: 85]
train_loss: 0.3635
lr: 0.006057
dice coefficient: 0.802
global correct: 95.2
average row correct: ['97.7', '77.7']
IoU: ['94.6', '67.1']
mean IoU: 80.9

[epoch: 86]
train_loss: 0.3441
lr: 0.006009
dice coefficient: 0.802
global correct: 95.2
average row correct: ['98.0', '76.1']
IoU: ['94.7', '67.0']
mean IoU: 80.9

[epoch: 87]
train_loss: 0.3553
lr: 0.005961
dice coefficient: 0.806
global correct: 95.1
average row correct: ['97.4', '79.8']
IoU: ['94.6', '67.6']
mean IoU: 81.1

[epoch: 88]
train_loss: 0.3558
lr: 0.005913
dice coefficient: 0.804
global correct: 95.0
average row correct: ['97.2', '80.4']
IoU: ['94.5', '67.4']
mean IoU: 80.9

[epoch: 89]
train_loss: 0.3638
lr: 0.005865
dice coefficient: 0.804
global correct: 95.1
average row correct: ['97.6', '78.5']
IoU: ['94.6', '67.3']
mean IoU: 81.0

[epoch: 90]
train_loss: 0.3546
lr: 0.005817
dice coefficient: 0.804
global correct: 95.1
average row correct: ['97.6', '78.5']
IoU: ['94.6', '67.3']
mean IoU: 80.9

[epoch: 91]
train_loss: 0.3587
lr: 0.005769
dice coefficient: 0.804
global correct: 95.0
average row correct: ['97.0', '81.0']
IoU: ['94.4', '67.3']
mean IoU: 80.9

[epoch: 92]
train_loss: 0.3546
lr: 0.005721
dice coefficient: 0.804
global correct: 95.0
average row correct: ['97.0', '81.2']
IoU: ['94.4', '67.3']
mean IoU: 80.9

[epoch: 93]
train_loss: 0.3505
lr: 0.005673
dice coefficient: 0.804
global correct: 95.2
average row correct: ['97.6', '78.4']
IoU: ['94.6', '67.3']
mean IoU: 81.0

[epoch: 94]
train_loss: 0.3545
lr: 0.005625
dice coefficient: 0.803
global correct: 95.0
average row correct: ['97.2', '80.2']
IoU: ['94.5', '67.2']
mean IoU: 80.9

[epoch: 95]
train_loss: 0.3497
lr: 0.005577
dice coefficient: 0.806
global correct: 95.2
average row correct: ['97.8', '78.0']
IoU: ['94.7', '67.6']
mean IoU: 81.1

[epoch: 96]
train_loss: 0.3476
lr: 0.005528
dice coefficient: 0.806
global correct: 95.1
average row correct: ['97.4', '79.9']
IoU: ['94.6', '67.7']
mean IoU: 81.1

[epoch: 97]
train_loss: 0.3479
lr: 0.005480
dice coefficient: 0.805
global correct: 95.0
average row correct: ['96.9', '81.8']
IoU: ['94.4', '67.5']
mean IoU: 81.0

[epoch: 98]
train_loss: 0.3563
lr: 0.005432
dice coefficient: 0.807
global correct: 95.1
average row correct: ['97.2', '80.6']
IoU: ['94.6', '67.8']
mean IoU: 81.2

[epoch: 99]
train_loss: 0.3444
lr: 0.005383
dice coefficient: 0.805
global correct: 95.2
average row correct: ['97.7', '78.2']
IoU: ['94.7', '67.5']
mean IoU: 81.1

[epoch: 100]
train_loss: 0.3419
lr: 0.005335
dice coefficient: 0.805
global correct: 95.1
average row correct: ['97.2', '80.4']
IoU: ['94.5', '67.5']
mean IoU: 81.0

[epoch: 101]
train_loss: 0.3504
lr: 0.005286
dice coefficient: 0.807
global correct: 95.0
average row correct: ['96.9', '82.1']
IoU: ['94.4', '67.7']
mean IoU: 81.1

[epoch: 102]
train_loss: 0.3511
lr: 0.005238
dice coefficient: 0.802
global correct: 95.0
average row correct: ['97.2', '79.7']
IoU: ['94.4', '67.0']
mean IoU: 80.7

[epoch: 103]
train_loss: 0.3431
lr: 0.005189
dice coefficient: 0.802
global correct: 95.2
average row correct: ['98.0', '76.5']
IoU: ['94.7', '67.1']
mean IoU: 80.9

[epoch: 104]
train_loss: 0.3453
lr: 0.005140
dice coefficient: 0.805
global correct: 95.2
average row correct: ['97.7', '78.0']
IoU: ['94.7', '67.5']
mean IoU: 81.1

[epoch: 105]
train_loss: 0.3475
lr: 0.005092
dice coefficient: 0.805
global correct: 95.0
average row correct: ['97.0', '81.2']
IoU: ['94.4', '67.5']
mean IoU: 81.0

[epoch: 106]
train_loss: 0.3434
lr: 0.005043
dice coefficient: 0.806
global correct: 95.1
average row correct: ['97.4', '79.7']
IoU: ['94.6', '67.6']
mean IoU: 81.1

[epoch: 107]
train_loss: 0.3426
lr: 0.004994
dice coefficient: 0.807
global correct: 95.2
average row correct: ['97.6', '78.8']
IoU: ['94.7', '67.8']
mean IoU: 81.2

[epoch: 108]
train_loss: 0.3372
lr: 0.004945
dice coefficient: 0.807
global correct: 95.0
average row correct: ['96.9', '82.1']
IoU: ['94.4', '67.8']
mean IoU: 81.1

[epoch: 109]
train_loss: 0.3474
lr: 0.004896
dice coefficient: 0.805
global correct: 95.2
average row correct: ['97.5', '79.1']
IoU: ['94.6', '67.5']
mean IoU: 81.1

[epoch: 110]
train_loss: 0.3393
lr: 0.004847
dice coefficient: 0.804
global correct: 95.1
average row correct: ['97.4', '79.4']
IoU: ['94.5', '67.3']
mean IoU: 80.9

[epoch: 111]
train_loss: 0.3381
lr: 0.004798
dice coefficient: 0.808
global correct: 95.2
average row correct: ['97.4', '79.9']
IoU: ['94.6', '67.8']
mean IoU: 81.2

[epoch: 112]
train_loss: 0.3464
lr: 0.004749
dice coefficient: 0.808
global correct: 95.2
average row correct: ['97.3', '80.4']
IoU: ['94.6', '68.0']
mean IoU: 81.3

[epoch: 113]
train_loss: 0.3397
lr: 0.004700
dice coefficient: 0.806
global correct: 95.3
average row correct: ['97.8', '77.7']
IoU: ['94.8', '67.7']
mean IoU: 81.2

[epoch: 114]
train_loss: 0.3409
lr: 0.004651
dice coefficient: 0.808
global correct: 95.1
average row correct: ['97.0', '81.7']
IoU: ['94.5', '67.9']
mean IoU: 81.2

[epoch: 115]
train_loss: 0.3396
lr: 0.004601
dice coefficient: 0.809
global correct: 95.2
average row correct: ['97.5', '79.8']
IoU: ['94.7', '68.1']
mean IoU: 81.4

[epoch: 116]
train_loss: 0.3402
lr: 0.004552
dice coefficient: 0.810
global correct: 95.3
average row correct: ['97.5', '79.8']
IoU: ['94.7', '68.2']
mean IoU: 81.5

[epoch: 117]
train_loss: 0.3444
lr: 0.004503
dice coefficient: 0.810
global correct: 95.1
average row correct: ['97.0', '82.2']
IoU: ['94.5', '68.2']
mean IoU: 81.3

[epoch: 118]
train_loss: 0.3391
lr: 0.004453
dice coefficient: 0.809
global correct: 95.1
average row correct: ['97.1', '81.7']
IoU: ['94.5', '68.0']
mean IoU: 81.3

[epoch: 119]
train_loss: 0.3360
lr: 0.004404
dice coefficient: 0.810
global correct: 95.2
average row correct: ['97.4', '80.5']
IoU: ['94.7', '68.2']
mean IoU: 81.4

[epoch: 120]
train_loss: 0.3418
lr: 0.004354
dice coefficient: 0.810
global correct: 95.2
average row correct: ['97.3', '80.6']
IoU: ['94.7', '68.2']
mean IoU: 81.4

[epoch: 121]
train_loss: 0.3308
lr: 0.004304
dice coefficient: 0.809
global correct: 95.1
average row correct: ['97.2', '81.1']
IoU: ['94.6', '68.0']
mean IoU: 81.3

[epoch: 122]
train_loss: 0.3440
lr: 0.004255
dice coefficient: 0.808
global correct: 95.2
average row correct: ['97.4', '80.2']
IoU: ['94.6', '67.9']
mean IoU: 81.3

[epoch: 123]
train_loss: 0.3344
lr: 0.004205
dice coefficient: 0.810
global correct: 95.2
average row correct: ['97.3', '80.8']
IoU: ['94.6', '68.2']
mean IoU: 81.4

[epoch: 124]
train_loss: 0.3282
lr: 0.004155
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.4', '80.7']
IoU: ['94.7', '68.4']
mean IoU: 81.5

[epoch: 125]
train_loss: 0.3342
lr: 0.004105
dice coefficient: 0.809
global correct: 95.2
average row correct: ['97.5', '79.7']
IoU: ['94.7', '68.1']
mean IoU: 81.4

[epoch: 126]
train_loss: 0.3411
lr: 0.004055
dice coefficient: 0.809
global correct: 95.2
average row correct: ['97.5', '79.7']
IoU: ['94.7', '68.0']
mean IoU: 81.4

[epoch: 127]
train_loss: 0.3415
lr: 0.004005
dice coefficient: 0.809
global correct: 95.3
average row correct: ['97.6', '79.2']
IoU: ['94.7', '68.0']
mean IoU: 81.4

[epoch: 128]
train_loss: 0.3360
lr: 0.003955
dice coefficient: 0.808
global correct: 95.3
average row correct: ['97.8', '78.3']
IoU: ['94.8', '67.8']
mean IoU: 81.3

[epoch: 129]
train_loss: 0.3323
lr: 0.003905
dice coefficient: 0.808
global correct: 95.0
average row correct: ['96.9', '82.2']
IoU: ['94.5', '67.9']
mean IoU: 81.2

[epoch: 130]
train_loss: 0.3427
lr: 0.003855
dice coefficient: 0.807
global correct: 94.9
average row correct: ['96.7', '83.2']
IoU: ['94.3', '67.7']
mean IoU: 81.0

[epoch: 131]
train_loss: 0.3402
lr: 0.003804
dice coefficient: 0.808
global correct: 95.1
average row correct: ['97.0', '82.0']
IoU: ['94.5', '67.9']
mean IoU: 81.2

[epoch: 132]
train_loss: 0.3388
lr: 0.003754
dice coefficient: 0.809
global correct: 95.2
average row correct: ['97.3', '80.7']
IoU: ['94.6', '68.0']
mean IoU: 81.3

[epoch: 133]
train_loss: 0.3366
lr: 0.003704
dice coefficient: 0.802
global correct: 94.8
average row correct: ['96.4', '83.4']
IoU: ['94.2', '67.1']
mean IoU: 80.6

[epoch: 134]
train_loss: 0.3347
lr: 0.003653
dice coefficient: 0.809
global correct: 95.2
average row correct: ['97.3', '80.7']
IoU: ['94.6', '68.0']
mean IoU: 81.3

[epoch: 135]
train_loss: 0.3405
lr: 0.003602
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.2', '81.2']
IoU: ['94.6', '68.3']
mean IoU: 81.5

[epoch: 136]
train_loss: 0.3342
lr: 0.003552
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.3', '81.1']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 137]
train_loss: 0.3309
lr: 0.003501
dice coefficient: 0.812
global correct: 95.3
average row correct: ['97.5', '80.2']
IoU: ['94.8', '68.4']
mean IoU: 81.6

[epoch: 138]
train_loss: 0.3281
lr: 0.003450
dice coefficient: 0.810
global correct: 95.3
average row correct: ['97.5', '79.8']
IoU: ['94.7', '68.2']
mean IoU: 81.5

[epoch: 139]
train_loss: 0.3283
lr: 0.003399
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.2', '81.6']
IoU: ['94.6', '68.3']
mean IoU: 81.5

[epoch: 140]
train_loss: 0.3344
lr: 0.003348
dice coefficient: 0.811
global correct: 95.3
average row correct: ['97.5', '80.1']
IoU: ['94.7', '68.4']
mean IoU: 81.6

[epoch: 141]
train_loss: 0.3331
lr: 0.003297
dice coefficient: 0.810
global correct: 95.3
average row correct: ['97.7', '78.8']
IoU: ['94.8', '68.2']
mean IoU: 81.5

[epoch: 142]
train_loss: 0.3339
lr: 0.003246
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.3', '81.0']
IoU: ['94.7', '68.2']
mean IoU: 81.4

[epoch: 143]
train_loss: 0.3274
lr: 0.003194
dice coefficient: 0.810
global correct: 95.1
average row correct: ['97.1', '81.7']
IoU: ['94.6', '68.1']
mean IoU: 81.3

[epoch: 144]
train_loss: 0.3410
lr: 0.003143
dice coefficient: 0.807
global correct: 94.9
average row correct: ['96.3', '84.7']
IoU: ['94.2', '67.7']
mean IoU: 81.0

[epoch: 145]
train_loss: 0.3397
lr: 0.003092
dice coefficient: 0.802
global correct: 94.7
average row correct: ['96.1', '84.9']
IoU: ['94.0', '67.0']
mean IoU: 80.5

[epoch: 146]
train_loss: 0.3273
lr: 0.003040
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.2', '81.7']
IoU: ['94.6', '68.4']
mean IoU: 81.5

[epoch: 147]
train_loss: 0.3300
lr: 0.002988
dice coefficient: 0.810
global correct: 95.1
average row correct: ['96.9', '82.7']
IoU: ['94.5', '68.2']
mean IoU: 81.3

[epoch: 148]
train_loss: 0.3318
lr: 0.002937
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.2', '81.3']
IoU: ['94.6', '68.3']
mean IoU: 81.4

[epoch: 149]
train_loss: 0.3350
lr: 0.002885
dice coefficient: 0.810
global correct: 95.2
average row correct: ['97.5', '80.1']
IoU: ['94.7', '68.2']
mean IoU: 81.4

[epoch: 150]
train_loss: 0.3335
lr: 0.002833
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.4', '80.4']
IoU: ['94.7', '68.2']
mean IoU: 81.5

[epoch: 151]
train_loss: 0.3259
lr: 0.002781
dice coefficient: 0.810
global correct: 95.1
average row correct: ['97.1', '81.9']
IoU: ['94.6', '68.2']
mean IoU: 81.4

[epoch: 152]
train_loss: 0.3304
lr: 0.002728
dice coefficient: 0.810
global correct: 95.1
average row correct: ['97.1', '81.6']
IoU: ['94.6', '68.2']
mean IoU: 81.4

[epoch: 153]
train_loss: 0.3352
lr: 0.002676
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.4', '80.6']
IoU: ['94.7', '68.3']
mean IoU: 81.5

[epoch: 154]
train_loss: 0.3272
lr: 0.002624
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.1', '82.0']
IoU: ['94.6', '68.3']
mean IoU: 81.5

[epoch: 155]
train_loss: 0.3339
lr: 0.002571
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.2', '81.6']
IoU: ['94.6', '68.4']
mean IoU: 81.5

[epoch: 156]
train_loss: 0.3288
lr: 0.002519
dice coefficient: 0.812
global correct: 95.3
average row correct: ['97.4', '80.5']
IoU: ['94.7', '68.4']
mean IoU: 81.6

[epoch: 157]
train_loss: 0.3247
lr: 0.002466
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.3', '81.0']
IoU: ['94.7', '68.4']
mean IoU: 81.6

[epoch: 158]
train_loss: 0.3381
lr: 0.002413
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.2', '81.7']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 159]
train_loss: 0.3318
lr: 0.002360
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.0', '82.8']
IoU: ['94.6', '68.5']
mean IoU: 81.5

[epoch: 160]
train_loss: 0.3281
lr: 0.002307
dice coefficient: 0.813
global correct: 95.3
average row correct: ['97.3', '81.1']
IoU: ['94.7', '68.6']
mean IoU: 81.7

[epoch: 161]
train_loss: 0.3322
lr: 0.002253
dice coefficient: 0.813
global correct: 95.3
average row correct: ['97.4', '80.9']
IoU: ['94.7', '68.6']
mean IoU: 81.7

[epoch: 162]
train_loss: 0.3288
lr: 0.002200
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.1', '81.8']
IoU: ['94.6', '68.4']
mean IoU: 81.5

[epoch: 163]
train_loss: 0.3301
lr: 0.002146
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.4', '80.5']
IoU: ['94.7', '68.3']
mean IoU: 81.5

[epoch: 164]
train_loss: 0.3272
lr: 0.002093
dice coefficient: 0.809
global correct: 95.3
average row correct: ['97.7', '78.6']
IoU: ['94.8', '68.0']
mean IoU: 81.4

[epoch: 165]
train_loss: 0.3313
lr: 0.002039
dice coefficient: 0.811
global correct: 95.3
average row correct: ['97.6', '79.8']
IoU: ['94.8', '68.3']
mean IoU: 81.6

[epoch: 166]
train_loss: 0.3281
lr: 0.001985
dice coefficient: 0.811
global correct: 95.2
average row correct: ['97.1', '81.8']
IoU: ['94.6', '68.3']
mean IoU: 81.5

[epoch: 167]
train_loss: 0.3335
lr: 0.001930
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '82.0']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 168]
train_loss: 0.3280
lr: 0.001876
dice coefficient: 0.813
global correct: 95.3
average row correct: ['97.3', '81.2']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 169]
train_loss: 0.3346
lr: 0.001822
dice coefficient: 0.813
global correct: 95.3
average row correct: ['97.4', '81.0']
IoU: ['94.8', '68.6']
mean IoU: 81.7

[epoch: 170]
train_loss: 0.3314
lr: 0.001767
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.1', '82.1']
IoU: ['94.6', '68.6']
mean IoU: 81.6

[epoch: 171]
train_loss: 0.3287
lr: 0.001712
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.1', '82.1']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 172]
train_loss: 0.3258
lr: 0.001657
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.1', '82.0']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 173]
train_loss: 0.3413
lr: 0.001601
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.3', '81.3']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 174]
train_loss: 0.3314
lr: 0.001546
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.0', '82.6']
IoU: ['94.6', '68.4']
mean IoU: 81.5

[epoch: 175]
train_loss: 0.3314
lr: 0.001490
dice coefficient: 0.812
global correct: 95.1
average row correct: ['96.9', '83.0']
IoU: ['94.6', '68.5']
mean IoU: 81.5

[epoch: 176]
train_loss: 0.3302
lr: 0.001434
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.1', '82.0']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 177]
train_loss: 0.3378
lr: 0.001378
dice coefficient: 0.813
global correct: 95.3
average row correct: ['97.4', '81.0']
IoU: ['94.7', '68.6']
mean IoU: 81.7

[epoch: 178]
train_loss: 0.3316
lr: 0.001321
dice coefficient: 0.812
global correct: 95.3
average row correct: ['97.3', '81.0']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 179]
train_loss: 0.3241
lr: 0.001265
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.2', '81.5']
IoU: ['94.7', '68.4']
mean IoU: 81.5

[epoch: 180]
train_loss: 0.3229
lr: 0.001208
dice coefficient: 0.810
global correct: 95.1
average row correct: ['96.9', '82.8']
IoU: ['94.5', '68.2']
mean IoU: 81.3

[epoch: 181]
train_loss: 0.3339
lr: 0.001150
dice coefficient: 0.810
global correct: 95.1
average row correct: ['96.8', '83.2']
IoU: ['94.5', '68.1']
mean IoU: 81.3

[epoch: 182]
train_loss: 0.3231
lr: 0.001093
dice coefficient: 0.810
global correct: 95.1
average row correct: ['96.9', '82.8']
IoU: ['94.5', '68.2']
mean IoU: 81.4

[epoch: 183]
train_loss: 0.3320
lr: 0.001035
dice coefficient: 0.811
global correct: 95.1
average row correct: ['96.9', '82.6']
IoU: ['94.5', '68.3']
mean IoU: 81.4

[epoch: 184]
train_loss: 0.3238
lr: 0.000976
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.2', '81.7']
IoU: ['94.6', '68.5']
mean IoU: 81.6

[epoch: 185]
train_loss: 0.3318
lr: 0.000917
dice coefficient: 0.812
global correct: 95.3
average row correct: ['97.3', '81.0']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 186]
train_loss: 0.3272
lr: 0.000858
dice coefficient: 0.812
global correct: 95.3
average row correct: ['97.4', '80.5']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 187]
train_loss: 0.3309
lr: 0.000799
dice coefficient: 0.812
global correct: 95.3
average row correct: ['97.4', '80.8']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 188]
train_loss: 0.3290
lr: 0.000738
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.3', '81.3']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 189]
train_loss: 0.3338
lr: 0.000678
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.1', '82.1']
IoU: ['94.6', '68.4']
mean IoU: 81.5

[epoch: 190]
train_loss: 0.3240
lr: 0.000616
dice coefficient: 0.812
global correct: 95.2
average row correct: ['97.1', '82.0']
IoU: ['94.6', '68.4']
mean IoU: 81.5

[epoch: 191]
train_loss: 0.3227
lr: 0.000554
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '81.7']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 192]
train_loss: 0.3224
lr: 0.000492
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.3', '81.4']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 193]
train_loss: 0.3254
lr: 0.000428
dice coefficient: 0.813
global correct: 95.3
average row correct: ['97.3', '81.3']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 194]
train_loss: 0.3269
lr: 0.000363
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.3', '81.4']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 195]
train_loss: 0.3352
lr: 0.000297
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '81.9']
IoU: ['94.7', '68.5']
mean IoU: 81.6

[epoch: 196]
train_loss: 0.3217
lr: 0.000229
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '81.7']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 197]
train_loss: 0.3253
lr: 0.000159
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '81.7']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 198]
train_loss: 0.3281
lr: 0.000085
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '81.6']
IoU: ['94.7', '68.6']
mean IoU: 81.6

[epoch: 199]
train_loss: 0.3214
lr: 0.000000
dice coefficient: 0.813
global correct: 95.2
average row correct: ['97.2', '81.5']
IoU: ['94.7', '68.6']
mean IoU: 81.6


================================================
FILE: pytorch_segmentation/unet/src/__init__.py
================================================
from .unet import UNet
from .mobilenet_unet import MobileV3Unet
from .vgg_unet import VGG16UNet


================================================
FILE: pytorch_segmentation/unet/src/mobilenet_unet.py
================================================
from collections import OrderedDict
from typing import Dict
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torchvision.models import mobilenet_v3_large
from .unet import Up, OutConv


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model

    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.

    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.

    Args:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    _version = 2
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")
        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}

        # 重新构建backbone，将没有使用到的模块全部删掉
        layers = OrderedDict()
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        out = OrderedDict()
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class MobileV3Unet(nn.Module):
    def __init__(self, num_classes, pretrain_backbone: bool = False):
        super(MobileV3Unet, self).__init__()
        backbone = mobilenet_v3_large(pretrained=pretrain_backbone)

        # if pretrain_backbone:
        #     # 载入mobilenetv3 large backbone预训练权重
        #     # https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth
        #     backbone.load_state_dict(torch.load("mobilenet_v3_large.pth", map_location='cpu'))

        backbone = backbone.features

        stage_indices = [1, 3, 6, 12, 15]
        self.stage_out_channels = [backbone[i].out_channels for i in stage_indices]
        return_layers = dict([(str(j), f"stage{i}") for i, j in enumerate(stage_indices)])
        self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

        c = self.stage_out_channels[4] + self.stage_out_channels[3]
        self.up1 = Up(c, self.stage_out_channels[3])
        c = self.stage_out_channels[3] + self.stage_out_channels[2]
        self.up2 = Up(c, self.stage_out_channels[2])
        c = self.stage_out_channels[2] + self.stage_out_channels[1]
        self.up3 = Up(c, self.stage_out_channels[1])
        c = self.stage_out_channels[1] + self.stage_out_channels[0]
        self.up4 = Up(c, self.stage_out_channels[0])
        self.conv = OutConv(self.stage_out_channels[0], num_classes=num_classes)

    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
        input_shape = x.shape[-2:]
        backbone_out = self.backbone(x)
        x = self.up1(backbone_out['stage4'], backbone_out['stage3'])
        x = self.up2(x, backbone_out['stage2'])
        x = self.up3(x, backbone_out['stage1'])
        x = self.up4(x, backbone_out['stage0'])
        x = self.conv(x)
        x = F.interpolate(x, size=input_shape, mode="bilinear", align_corners=False)

        return {"out": x}


================================================
FILE: pytorch_segmentation/unet/src/unet.py
================================================
from typing import Dict
import torch
import torch.nn as nn
import torch.nn.functional as F


class DoubleConv(nn.Sequential):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        if mid_channels is None:
            mid_channels = out_channels
        super(DoubleConv, self).__init__(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )


class Down(nn.Sequential):
    def __init__(self, in_channels, out_channels):
        super(Down, self).__init__(
            nn.MaxPool2d(2, stride=2),
            DoubleConv(in_channels, out_channels)
        )


class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super(Up, self).__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
        x1 = self.up(x1)
        # [N, C, H, W]
        diff_y = x2.size()[2] - x1.size()[2]
        diff_x = x2.size()[3] - x1.size()[3]

        # padding_left, padding_right, padding_top, padding_bottom
        x1 = F.pad(x1, [diff_x // 2, diff_x - diff_x // 2,
                        diff_y // 2, diff_y - diff_y // 2])

        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class OutConv(nn.Sequential):
    def __init__(self, in_channels, num_classes):
        super(OutConv, self).__init__(
            nn.Conv2d(in_channels, num_classes, kernel_size=1)
        )


class UNet(nn.Module):
    def __init__(self,
                 in_channels: int = 1,
                 num_classes: int = 2,
                 bilinear: bool = True,
                 base_c: int = 64):
        super(UNet, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.bilinear = bilinear

        self.in_conv = DoubleConv(in_channels, base_c)
        self.down1 = Down(base_c, base_c * 2)
        self.down2 = Down(base_c * 2, base_c * 4)
        self.down3 = Down(base_c * 4, base_c * 8)
        factor = 2 if bilinear else 1
        self.down4 = Down(base_c * 8, base_c * 16 // factor)
        self.up1 = Up(base_c * 16, base_c * 8 // factor, bilinear)
        self.up2 = Up(base_c * 8, base_c * 4 // factor, bilinear)
        self.up3 = Up(base_c * 4, base_c * 2 // factor, bilinear)
        self.up4 = Up(base_c * 2, base_c, bilinear)
        self.out_conv = OutConv(base_c, num_classes)

    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
        x1 = self.in_conv(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.out_conv(x)

        return {"out": logits}


================================================
FILE: pytorch_segmentation/unet/src/vgg_unet.py
================================================
from collections import OrderedDict
from typing import Dict

import torch
import torch.nn as nn
from torch import Tensor
from torchvision.models import vgg16_bn
from .unet import Up, OutConv


class IntermediateLayerGetter(nn.ModuleDict):
    """
    Module wrapper that returns intermediate layers from a model

    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.

    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.

    Args:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    """
    _version = 2
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")
        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}

        # 重新构建backbone，将没有使用到的模块全部删掉
        layers = OrderedDict()
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x: Tensor) -> Dict[str, Tensor]:
        out = OrderedDict()
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class VGG16UNet(nn.Module):
    def __init__(self, num_classes, pretrain_backbone: bool = False):
        super(VGG16UNet, self).__init__()
        backbone = vgg16_bn(pretrained=pretrain_backbone)

        # if pretrain_backbone:
        #     # 载入vgg16_bn预训练权重
        #     # https://download.pytorch.org/models/vgg16_bn-6c64b313.pth
        #     backbone.load_state_dict(torch.load("vgg16_bn.pth", map_location='cpu'))

        backbone = backbone.features

        stage_indices = [5, 12, 22, 32, 42]
        self.stage_out_channels = [64, 128, 256, 512, 512]
        return_layers = dict([(str(j), f"stage{i}") for i, j in enumerate(stage_indices)])
        self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)

        c = self.stage_out_channels[4] + self.stage_out_channels[3]
        self.up1 = Up(c, self.stage_out_channels[3])
        c = self.stage_out_channels[3] + self.stage_out_channels[2]
        self.up2 = Up(c, self.stage_out_channels[2])
        c = self.stage_out_channels[2] + self.stage_out_channels[1]
        self.up3 = Up(c, self.stage_out_channels[1])
        c = self.stage_out_channels[1] + self.stage_out_channels[0]
        self.up4 = Up(c, self.stage_out_channels[0])
        self.conv = OutConv(self.stage_out_channels[0], num_classes=num_classes)

    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
        backbone_out = self.backbone(x)
        x = self.up1(backbone_out['stage4'], backbone_out['stage3'])
        x = self.up2(x, backbone_out['stage2'])
        x = self.up3(x, backbone_out['stage1'])
        x = self.up4(x, backbone_out['stage0'])
        x = self.conv(x)

        return {"out": x}


================================================
FILE: pytorch_segmentation/unet/train.py
================================================
import os
import time
import datetime

import torch

from src import UNet
from train_utils import train_one_epoch, evaluate, create_lr_scheduler
from my_dataset import DriveDataset
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, vflip_prob=0.5,
                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(1.2 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        if vflip_prob > 0:
            trans.append(T.RandomVerticalFlip(vflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
    base_size = 565
    crop_size = 480

    if train:
        return SegmentationPresetTrain(base_size, crop_size, mean=mean, std=std)
    else:
        return SegmentationPresetEval(mean=mean, std=std)


def create_model(num_classes):
    model = UNet(in_channels=3, num_classes=num_classes, base_c=32)
    return model


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    # using compute_mean_std.py
    mean = (0.709, 0.381, 0.224)
    std = (0.127, 0.079, 0.043)

    # 用来保存训练以及验证过程中信息
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    train_dataset = DriveDataset(args.data_path,
                                 train=True,
                                 transforms=get_transform(train=True, mean=mean, std=std))

    val_dataset = DriveDataset(args.data_path,
                               train=False,
                               transforms=get_transform(train=False, mean=mean, std=std))

    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True,
                                               pin_memory=True,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=num_workers,
                                             pin_memory=True,
                                             collate_fn=val_dataset.collate_fn)

    model = create_model(num_classes=num_classes)
    model.to(device)

    params_to_optimize = [p for p in model.parameters() if p.requires_grad]

    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay
    )

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    best_dice = 0.
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch, num_classes,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat, dice = evaluate(model, val_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        print(f"dice coefficient: {dice:.3f}")
        # write into txt
        with open(results_file, "a") as f:
            # 记录每个epoch对应的train_loss、lr以及验证集各指标
            train_info = f"[epoch: {epoch}]\n" \
                         f"train_loss: {mean_loss:.4f}\n" \
                         f"lr: {lr:.6f}\n" \
                         f"dice coefficient: {dice:.3f}\n"
            f.write(train_info + val_info + "\n\n")

        if args.save_best is True:
            if best_dice < dice:
                best_dice = dice
            else:
                continue

        save_file = {"model": model.state_dict(),
                     "optimizer": optimizer.state_dict(),
                     "lr_scheduler": lr_scheduler.state_dict(),
                     "epoch": epoch,
                     "args": args}
        if args.amp:
            save_file["scaler"] = scaler.state_dict()

        if args.save_best is True:
            torch.save(save_file, "save_weights/best_model.pth")
        else:
            torch.save(save_file, "save_weights/model_{}.pth".format(epoch))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("training time {}".format(total_time_str))


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="pytorch unet training")

    parser.add_argument("--data-path", default="./", help="DRIVE root")
    # exclude background
    parser.add_argument("--num-classes", default=1, type=int)
    parser.add_argument("--device", default="cuda", help="training device")
    parser.add_argument("-b", "--batch-size", default=4, type=int)
    parser.add_argument("--epochs", default=200, type=int, metavar="N",
                        help="number of total epochs to train")

    parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--print-freq', default=1, type=int, help='print frequency')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    parser.add_argument('--save-best', default=True, type=bool, help='only save best dice weights')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_args()

    if not os.path.exists("./save_weights"):
        os.mkdir("./save_weights")

    main(args)


================================================
FILE: pytorch_segmentation/unet/train_multi_GPU.py
================================================
import time
import os
import datetime

import torch

from src import UNet
from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir
from my_dataset import DriveDataset
import transforms as T


class SegmentationPresetTrain:
    def __init__(self, base_size, crop_size, hflip_prob=0.5, vflip_prob=0.5,
                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        min_size = int(0.5 * base_size)
        max_size = int(1.2 * base_size)

        trans = [T.RandomResize(min_size, max_size)]
        if hflip_prob > 0:
            trans.append(T.RandomHorizontalFlip(hflip_prob))
        if vflip_prob > 0:
            trans.append(T.RandomVerticalFlip(vflip_prob))
        trans.extend([
            T.RandomCrop(crop_size),
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])
        self.transforms = T.Compose(trans)

    def __call__(self, img, target):
        return self.transforms(img, target)


class SegmentationPresetEval:
    def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.transforms = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=mean, std=std),
        ])

    def __call__(self, img, target):
        return self.transforms(img, target)


def get_transform(train, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
    base_size = 565
    crop_size = 480

    if train:
        return SegmentationPresetTrain(base_size, crop_size, mean=mean, std=std)
    else:
        return SegmentationPresetEval(mean=mean, std=std)


def create_model(num_classes):
    model = UNet(in_channels=3, num_classes=num_classes, base_c=32)
    return model


def main(args):
    init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)
    # segmentation nun_classes + background
    num_classes = args.num_classes + 1

    mean = (0.709, 0.381, 0.224)
    std = (0.127, 0.079, 0.043)

    # 用来保存coco_info的文件
    results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

    data_root = args.data_path
    # check data root
    if os.path.exists(os.path.join(data_root, "DRIVE")) is False:
        raise FileNotFoundError("DRIVE dose not in path:'{}'.".format(data_root))

    train_dataset = DriveDataset(args.data_path,
                                 train=True,
                                 transforms=get_transform(train=True, mean=mean, std=std))

    val_dataset = DriveDataset(args.data_path,
                               train=False,
                               transforms=get_transform(train=False, mean=mean, std=std))

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = torch.utils.data.RandomSampler(train_dataset)
        test_sampler = torch.utils.data.SequentialSampler(val_dataset)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=train_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn, drop_last=True)

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=train_dataset.collate_fn)

    print("Creating model")
    # create model num_classes equal background + foreground classes
    model = create_model(num_classes=num_classes)
    model.to(device)

    if args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params_to_optimize = [p for p in model_without_ddp.parameters() if p.requires_grad]

    optimizer = torch.optim.SGD(
        params_to_optimize,
        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)
    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)

    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练
    if args.resume:
        # If map_location is missing, torch.load will first load the module to CPU
        # and then copy each parameter to where it was saved,
        # which would result in all processes on the same machine using the same set of devices.
        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
        if args.amp:
            scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        return

    best_dice = 0.
    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, num_classes,
                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)

        confmat, dice = evaluate(model, val_data_loader, device=device, num_classes=num_classes)
        val_info = str(confmat)
        print(val_info)
        print(f"dice coefficient: {dice:.3f}")

        # 只在主进程上进行写操作
        if args.rank in [-1, 0]:
            # write into txt
            with open(results_file, "a") as f:
                # 记录每个epoch对应的train_loss、lr以及验证集各指标
                train_info = f"[epoch: {epoch}]\n" \
                             f"train_loss: {mean_loss:.4f}\n" \
                             f"lr: {lr:.6f}\n" \
                             f"dice coefficient: {dice:.3f}\n"
                f.write(train_info + val_info + "\n\n")

        if args.save_best is True:
            if best_dice < dice:
                best_dice = dice
            else:
                continue

        if args.output_dir:
            # 只在主节点上执行保存权重操作
            save_file = {'model': model_without_ddp.state_dict(),
                         'optimizer': optimizer.state_dict(),
                         'lr_scheduler': lr_scheduler.state_dict(),
                         'args': args,
                         'epoch': epoch}
            if args.amp:
                save_file["scaler"] = scaler.state_dict()

            if args.save_best is True:
                save_on_master(save_file,
                               os.path.join(args.output_dir, 'best_model.pth'))
            else:
                save_on_master(save_file,
                               os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练文件的根目录(DRIVE)
    parser.add_argument('--data-path', default='./', help='dataset')
    # 训练设备类型
    parser.add_argument('--device', default='cuda', help='device')
    # 检测目标类别数(不包含背景)
    parser.add_argument('--num-classes', default=1, type=int, help='num_classes')
    # 每块GPU上的batch_size
    parser.add_argument('-b', '--batch-size', default=4, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    # 指定接着从哪个epoch数开始训练
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=200, type=int, metavar='N',
                        help='number of total epochs to run')
    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢
    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')
    # 数据加载以及预处理的线程数
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    # 训练学习率，这里默认设置成0.01(使用n块GPU建议乘以n)，如果效果不好可以尝试修改学习率
    parser.add_argument('--lr', default=0.01, type=float,
                        help='initial learning rate')
    # SGD的momentum参数
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    # SGD的weight_decay参数
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # 只保存dice coefficient值最高的权重
    parser.add_argument('--save-best', default=True, type=bool, help='only save best weights')
    # 训练过程打印信息的频率
    parser.add_argument('--print-freq', default=1, type=int, help='print frequency')
    # 文件保存地址
    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')
    # 基于上次的训练结果接着训练
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    # 不训练，仅测试
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    # 分布式进程数
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    # Mixed precision training parameters
    parser.add_argument("--amp", default=False, type=bool,
                        help="Use torch.cuda.amp for mixed precision training")

    args = parser.parse_args()

    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建
    if args.output_dir:
        mkdir(args.output_dir)

    main(args)


================================================
FILE: pytorch_segmentation/unet/train_utils/__init__.py
================================================
from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler
from .distributed_utils import init_distributed_mode, save_on_master, mkdir


================================================
FILE: pytorch_segmentation/unet/train_utils/dice_coefficient_loss.py
================================================
import torch
import torch.nn as nn


def build_target(target: torch.Tensor, num_classes: int = 2, ignore_index: int = -100):
    """build target for dice coefficient"""
    dice_target = target.clone()
    if ignore_index >= 0:
        ignore_mask = torch.eq(target, ignore_index)
        dice_target[ignore_mask] = 0
        # [N, H, W] -> [N, H, W, C]
        dice_target = nn.functional.one_hot(dice_target, num_classes).float()
        dice_target[ignore_mask] = ignore_index
    else:
        dice_target = nn.functional.one_hot(dice_target, num_classes).float()

    return dice_target.permute(0, 3, 1, 2)


def dice_coeff(x: torch.Tensor, target: torch.Tensor, ignore_index: int = -100, epsilon=1e-6):
    # Average of Dice coefficient for all batches, or for a single mask
    # 计算一个batch中所有图片某个类别的dice_coefficient
    d = 0.
    batch_size = x.shape[0]
    for i in range(batch_size):
        x_i = x[i].reshape(-1)
        t_i = target[i].reshape(-1)
        if ignore_index >= 0:
            # 找出mask中不为ignore_index的区域
            roi_mask = torch.ne(t_i, ignore_index)
            x_i = x_i[roi_mask]
            t_i = t_i[roi_mask]
        inter = torch.dot(x_i, t_i)
        sets_sum = torch.sum(x_i) + torch.sum(t_i)
        if sets_sum == 0:
            sets_sum = 2 * inter

        d += (2 * inter + epsilon) / (sets_sum + epsilon)

    return d / batch_size


def multiclass_dice_coeff(x: torch.Tensor, target: torch.Tensor, ignore_index: int = -100, epsilon=1e-6):
    """Average of Dice coefficient for all classes"""
    dice = 0.
    for channel in range(x.shape[1]):
        dice += dice_coeff(x[:, channel, ...], target[:, channel, ...], ignore_index, epsilon)

    return dice / x.shape[1]


def dice_loss(x: torch.Tensor, target: torch.Tensor, multiclass: bool = False, ignore_index: int = -100):
    # Dice loss (objective to minimize) between 0 and 1
    x = nn.functional.softmax(x, dim=1)
    fn = multiclass_dice_coeff if multiclass else dice_coeff
    return 1 - fn(x, target, ignore_index=ignore_index)


================================================
FILE: pytorch_segmentation/unet/train_utils/distributed_utils.py
================================================
from collections import defaultdict, deque
import datetime
import time
import torch
import torch.nn.functional as F
import torch.distributed as dist

import errno
import os

from .dice_coefficient_loss import multiclass_dice_coeff, build_target


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{value:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


class ConfusionMatrix(object):
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.mat = None

    def update(self, a, b):
        n = self.num_classes
        if self.mat is None:
            # 创建混淆矩阵
            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
        with torch.no_grad():
            # 寻找GT中为目标的像素索引
            k = (a >= 0) & (a < n)
            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)
            inds = n * a[k].to(torch.int64) + b[k]
            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)

    def reset(self):
        if self.mat is not None:
            self.mat.zero_()

    def compute(self):
        h = self.mat.float()
        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)
        acc_global = torch.diag(h).sum() / h.sum()
        # 计算每个类别的准确率
        acc = torch.diag(h) / h.sum(1)
        # 计算每个类别预测与真实目标的iou
        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))
        return acc_global, acc, iu

    def reduce_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        torch.distributed.all_reduce(self.mat)

    def __str__(self):
        acc_global, acc, iu = self.compute()
        return (
            'global correct: {:.1f}\n'
            'average row correct: {}\n'
            'IoU: {}\n'
            'mean IoU: {:.1f}').format(
                acc_global.item() * 100,
                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],
                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],
                iu.mean().item() * 100)


class DiceCoefficient(object):
    def __init__(self, num_classes: int = 2, ignore_index: int = -100):
        self.cumulative_dice = None
        self.num_classes = num_classes
        self.ignore_index = ignore_index
        self.count = None

    def update(self, pred, target):
        if self.cumulative_dice is None:
            self.cumulative_dice = torch.zeros(1, dtype=pred.dtype, device=pred.device)
        if self.count is None:
            self.count = torch.zeros(1, dtype=pred.dtype, device=pred.device)
        # compute the Dice score, ignoring background
        pred = F.one_hot(pred.argmax(dim=1), self.num_classes).permute(0, 3, 1, 2).float()
        dice_target = build_target(target, self.num_classes, self.ignore_index)
        self.cumulative_dice += multiclass_dice_coeff(pred[:, 1:], dice_target[:, 1:], ignore_index=self.ignore_index)
        self.count += 1

    @property
    def value(self):
        if self.count == 0:
            return 0
        else:
            return self.cumulative_dice / self.count

    def reset(self):
        if self.cumulative_dice is not None:
            self.cumulative_dice.zero_()

        if self.count is not None:
            self.count.zeros_()

    def reduce_from_all_processes(self):
        if not torch.distributed.is_available():
            return
        if not torch.distributed.is_initialized():
            return
        torch.distributed.barrier()
        torch.distributed.all_reduce(self.cumulative_dice)
        torch.distributed.all_reduce(self.count)


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {}'.format(header, total_time_str))


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    elif hasattr(args, "rank"):
        pass
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    setup_for_distributed(args.rank == 0)


================================================
FILE: pytorch_segmentation/unet/train_utils/train_and_eval.py
================================================
import torch
from torch import nn
import train_utils.distributed_utils as utils
from .dice_coefficient_loss import dice_loss, build_target


def criterion(inputs, target, loss_weight=None, num_classes: int = 2, dice: bool = True, ignore_index: int = -100):
    losses = {}
    for name, x in inputs.items():
        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充
        loss = nn.functional.cross_entropy(x, target, ignore_index=ignore_index, weight=loss_weight)
        if dice is True:
            dice_target = build_target(target, num_classes, ignore_index)
            loss += dice_loss(x, dice_target, multiclass=True, ignore_index=ignore_index)
        losses[name] = loss

    if len(losses) == 1:
        return losses['out']

    return losses['out'] + 0.5 * losses['aux']


def evaluate(model, data_loader, device, num_classes):
    model.eval()
    confmat = utils.ConfusionMatrix(num_classes)
    dice = utils.DiceCoefficient(num_classes=num_classes, ignore_index=255)
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, 100, header):
            image, target = image.to(device), target.to(device)
            output = model(image)
            output = output['out']

            confmat.update(target.flatten(), output.argmax(1).flatten())
            dice.update(output, target)

        confmat.reduce_from_all_processes()
        dice.reduce_from_all_processes()

    return confmat, dice.value.item()


def train_one_epoch(model, optimizer, data_loader, device, epoch, num_classes,
                    lr_scheduler, print_freq=10, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    if num_classes == 2:
        # 设置cross_entropy中背景和前景的loss权重(根据自己的数据集进行设置)
        loss_weight = torch.as_tensor([1.0, 2.0], device=device)
    else:
        loss_weight = None

    for image, target in metric_logger.log_every(data_loader, print_freq, header):
        image, target = image.to(device), target.to(device)
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            output = model(image)
            loss = criterion(output, target, loss_weight, num_classes=num_classes, ignore_index=255)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        lr_scheduler.step()

        lr = optimizer.param_groups[0]["lr"]
        metric_logger.update(loss=loss.item(), lr=lr)

    return metric_logger.meters["loss"].global_avg, lr


def create_lr_scheduler(optimizer,
                        num_step: int,
                        epochs: int,
                        warmup=True,
                        warmup_epochs=1,
                        warmup_factor=1e-3):
    assert num_step > 0 and epochs > 0
    if warmup is False:
        warmup_epochs = 0

    def f(x):
        """
        根据step数返回一个学习率倍率因子，
        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法
        """
        if warmup is True and x <= (warmup_epochs * num_step):
            alpha = float(x) / (warmup_epochs * num_step)
            # warmup过程中lr倍率因子从warmup_factor -> 1
            return warmup_factor * (1 - alpha) + alpha
        else:
            # warmup后lr倍率因子从1 -> 0
            # 参考deeplab_v2: Learning rate policy
            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)


================================================
FILE: pytorch_segmentation/unet/transforms.py
================================================
import numpy as np
import random

import torch
from torchvision import transforms as T
from torchvision.transforms import functional as F


def pad_if_smaller(img, size, fill=0):
    # 如果图像最小边长小于给定size，则用数值fill进行padding
    min_size = min(img.size)
    if min_size < size:
        ow, oh = img.size
        padh = size - oh if oh < size else 0
        padw = size - ow if ow < size else 0
        img = F.pad(img, (0, 0, padw, padh), fill=fill)
    return img


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class RandomResize(object):
    def __init__(self, min_size, max_size=None):
        self.min_size = min_size
        if max_size is None:
            max_size = min_size
        self.max_size = max_size

    def __call__(self, image, target):
        size = random.randint(self.min_size, self.max_size)
        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小
        image = F.resize(image, size)
        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST
        # 如果是之前的版本需要使用PIL.Image.NEAREST
        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, flip_prob):
        self.flip_prob = flip_prob

    def __call__(self, image, target):
        if random.random() < self.flip_prob:
            image = F.hflip(image)
            target = F.hflip(target)
        return image, target


class RandomVerticalFlip(object):
    def __init__(self, flip_prob):
        self.flip_prob = flip_prob

    def __call__(self, image, target):
        if random.random() < self.flip_prob:
            image = F.vflip(image)
            target = F.vflip(target)
        return image, target


class RandomCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = pad_if_smaller(image, self.size)
        target = pad_if_smaller(target, self.size, fill=255)
        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))
        image = F.crop(image, *crop_params)
        target = F.crop(target, *crop_params)
        return image, target


class CenterCrop(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, image, target):
        image = F.center_crop(image, self.size)
        target = F.center_crop(target, self.size)
        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        target = torch.as_tensor(np.array(target), dtype=torch.int64)
        return image, target


class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target


================================================
FILE: summary_problem.md
================================================
## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装
参考我之前写的博文：[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU（CUDA10.1）](https://blog.csdn.net/qq_37541097/article/details/103933366)


## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0]
强烈不建议混用，即使两个模型的名称结构完全一致也不要混用，里面有坑，用什么方法训练的模型就载入相应的模型权重


## 使用subclassed模型时无法使用model.summary() [tensorflow2.0.0]
subclassed模型在实例化时没有自动进行build操作（只有在开始训练时，才会自动进行build），如果需要使用summary操作，需要提前手动build  
model.build((batch_size, height, width, channel))


## 无法使用keras的plot_model(model, 'my_model.png')问题 [tensorflow2.0.0]
#### 在linux下你需要安装一些包：
* pip install pydot==1.2.3
* sudo apt-get install graphviz   
#### 在windows中，同样需要安装一些包（windows比较麻烦）：
* pip install pydot==1.2.3
* 安装graphviz，并添加相关环境变量  
参考连接：https://github.com/XifengGuo/CapsNet-Keras/issues/7

## 为什么每计算一个batch，就需要调用一次optimizer.zero_grad() [Pytorch1.3]   
如果不清除历史梯度，就会对计算的历史梯度进行累加（通过这个特性你能够变相实现一个很大batch数值的训练）   
参考链接：https://www.zhihu.com/question/303070254    

## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3]  
pillow版本过高导致，安装版本号小于7.0.0即可

================================================
FILE: tensorflow_classification/ConfusionMatrix/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: tensorflow_classification/ConfusionMatrix/main.py
================================================
import os
import math
import json
import glob

from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tqdm import tqdm
from prettytable import PrettyTable

from model import MobileNetV2


class ConfusionMatrix(object):
    """
    注意，如果显示的图像不全，是matplotlib版本问题
    本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常
    需要额外安装prettytable库
    """
    def __init__(self, num_classes: int, labels: list):
        self.matrix = np.zeros((num_classes, num_classes))
        self.num_classes = num_classes
        self.labels = labels

    def update(self, preds, labels):
        for p, t in zip(preds, labels):
            self.matrix[p, t] += 1

    def summary(self):
        # calculate accuracy
        sum_TP = 0
        for i in range(self.num_classes):
            sum_TP += self.matrix[i, i]
        acc = sum_TP / np.sum(self.matrix)
        print("the model accuracy is ", acc)

        # precision, recall, specificity
        table = PrettyTable()
        table.field_names = ["", "Precision", "Recall", "Specificity"]
        for i in range(self.num_classes):
            TP = self.matrix[i, i]
            FP = np.sum(self.matrix[i, :]) - TP
            FN = np.sum(self.matrix[:, i]) - TP
            TN = np.sum(self.matrix) - TP - FP - FN
            Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0.
            Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0.
            Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0.
            table.add_row([self.labels[i], Precision, Recall, Specificity])
        print(table)

    def plot(self):
        matrix = self.matrix
        print(matrix)
        plt.imshow(matrix, cmap=plt.cm.Blues)

        # 设置x轴坐标label
        plt.xticks(range(self.num_classes), self.labels, rotation=45)
        # 设置y轴坐标label
        plt.yticks(range(self.num_classes), self.labels)
        # 显示colorbar
        plt.colorbar()
        plt.xlabel('True Labels')
        plt.ylabel('Predicted Labels')
        plt.title('Confusion matrix')

        # 在图中标注数量/概率信息
        thresh = matrix.max() / 2
        for x in range(self.num_classes):
            for y in range(self.num_classes):
                # 注意这里的matrix[y, x]不是matrix[x, y]
                info = int(matrix[y, x])
                plt.text(x, y, info,
                         verticalalignment='center',
                         horizontalalignment='center',
                         color="white" if info > thresh else "black")
        plt.tight_layout()
        plt.show()


if __name__ == '__main__':
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    im_height = 224
    im_width = 224
    batch_size = 16


    def pre_function(img):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img / 255.
        img = (img - 0.5) * 2.0
        return img


    # data generator with data augmentation
    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    # img, _ = next(train_data_gen)
    total_val = val_data_gen.n

    model = MobileNetV2(num_classes=5)
    # feature.build((None, 224, 224, 3))  # when using subclass model
    pre_weights_path = './myMobileNet.ckpt'
    assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path)

    # read class_indict
    label_path = './class_indices.json'
    assert os.path.exists(label_path), "cannot find {}".format(label_path)
    json_file = open(label_path, 'r')
    class_indict = json.load(json_file)

    labels = [label for _, label in class_indict.items()]
    confusion = ConfusionMatrix(num_classes=5, labels=labels)

    # validate
    for step in tqdm(range(math.ceil(total_val / batch_size))):
        val_images, val_labels = next(val_data_gen)
        results = model.predict_on_batch(val_images)
        results = tf.keras.layers.Softmax()(results).numpy()
        results = np.argmax(results, axis=-1)
        labels = np.argmax(val_labels, axis=-1)
        confusion.update(results, labels)
    confusion.plot()
    confusion.summary()


================================================
FILE: tensorflow_classification/ConfusionMatrix/model.py
================================================
from tensorflow.keras import layers, Model, Sequential


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(layers.Layer):
    def __init__(self, out_channel, kernel_size=3, stride=1, **kwargs):
        super(ConvBNReLU, self).__init__(**kwargs)
        self.conv = layers.Conv2D(filters=out_channel, kernel_size=kernel_size,
                                  strides=stride, padding='SAME', use_bias=False, name='Conv2d')
        self.bn = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='BatchNorm')
        self.activation = layers.ReLU(max_value=6.0)

    def call(self, inputs, training=False, **kwargs):
        x = self.conv(inputs)
        x = self.bn(x, training=training)
        x = self.activation(x)
        return x


class InvertedResidual(layers.Layer):
    def __init__(self, in_channel, out_channel, stride, expand_ratio, **kwargs):
        super(InvertedResidual, self).__init__(**kwargs)
        self.hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel

        layer_list = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layer_list.append(ConvBNReLU(out_channel=self.hidden_channel, kernel_size=1, name='expand'))
        layer_list.extend([
            # 3x3 depthwise conv
            layers.DepthwiseConv2D(kernel_size=3, padding='SAME', strides=stride,
                                   use_bias=False, name='depthwise'),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='depthwise/BatchNorm'),
            layers.ReLU(max_value=6.0),
            # 1x1 pointwise conv(linear)
            layers.Conv2D(filters=out_channel, kernel_size=1, strides=1,
                          padding='SAME', use_bias=False, name='project'),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='project/BatchNorm')
        ])
        self.main_branch = Sequential(layer_list, name='expanded_conv')

    def call(self, inputs, **kwargs):
        if self.use_shortcut:
            return inputs + self.main_branch(inputs)
        else:
            return self.main_branch(inputs)


def MobileNetV2(im_height=224, im_width=224, num_classes=1000, alpha=1.0, round_nearest=8):
    block = InvertedResidual
    input_channel = _make_divisible(32 * alpha, round_nearest)
    last_channel = _make_divisible(1280 * alpha, round_nearest)
    inverted_residual_setting = [
        # t, c, n, s
        [1, 16, 1, 1],
        [6, 24, 2, 2],
        [6, 32, 3, 2],
        [6, 64, 4, 2],
        [6, 96, 3, 1],
        [6, 160, 3, 2],
        [6, 320, 1, 1],
    ]

    input_image = layers.Input(shape=(im_height, im_width, 3), dtype='float32')
    # conv1
    x = ConvBNReLU(input_channel, stride=2, name='Conv')(input_image)
    # building inverted residual residual blockes
    for t, c, n, s in inverted_residual_setting:
        output_channel = _make_divisible(c * alpha, round_nearest)
        for i in range(n):
            stride = s if i == 0 else 1
            x = block(x.shape[-1], output_channel, stride, expand_ratio=t)(x)
    # building last several layers
    x = ConvBNReLU(last_channel, kernel_size=1, name='Conv_1')(x)

    # building classifier
    x = layers.GlobalAveragePooling2D()(x)  # pool + flatten
    x = layers.Dropout(0.2)(x)
    output = layers.Dense(num_classes, name='Logits')(x)

    model = Model(inputs=input_image, outputs=output)
    return model


================================================
FILE: tensorflow_classification/ConvNeXt/model.py
================================================
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, initializers, Model

KERNEL_INITIALIZER = {
    "class_name": "TruncatedNormal",
    "config": {
        "stddev": 0.2
    }
}

BIAS_INITIALIZER = "Zeros"


class Block(layers.Layer):
    """
    Args:
        dim (int): Number of input channels.
        drop_rate (float): Stochastic depth rate. Default: 0.0
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
    """
    def __init__(self, dim, drop_rate=0., layer_scale_init_value=1e-6, name: str = None):
        super().__init__(name=name)
        self.layer_scale_init_value = layer_scale_init_value
        self.dwconv = layers.DepthwiseConv2D(7,
                                             padding="same",
                                             depthwise_initializer=KERNEL_INITIALIZER,
                                             bias_initializer=BIAS_INITIALIZER,
                                             name="dwconv")
        self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm")
        self.pwconv1 = layers.Dense(4 * dim,
                                    kernel_initializer=KERNEL_INITIALIZER,
                                    bias_initializer=BIAS_INITIALIZER,
                                    name="pwconv1")
        self.act = layers.Activation("gelu")
        self.pwconv2 = layers.Dense(dim,
                                    kernel_initializer=KERNEL_INITIALIZER,
                                    bias_initializer=BIAS_INITIALIZER,
                                    name="pwconv2")
        self.drop_path = layers.Dropout(drop_rate, noise_shape=(None, 1, 1, 1)) if drop_rate > 0 else None

    def build(self, input_shape):
        if self.layer_scale_init_value > 0:
            self.gamma = self.add_weight(shape=[input_shape[-1]],
                                         initializer=initializers.Constant(self.layer_scale_init_value),
                                         trainable=True,
                                         dtype=tf.float32,
                                         name="gamma")
        else:
            self.gamma = None

    def call(self, x, training=False):
        shortcut = x
        x = self.dwconv(x)
        x = self.norm(x, training=training)
        x = self.pwconv1(x)
        x = self.act(x)
        x = self.pwconv2(x)

        if self.gamma is not None:
            x = self.gamma * x

        if self.drop_path is not None:
            x = self.drop_path(x, training=training)

        return shortcut + x


class Stem(layers.Layer):
    def __init__(self, dim, name: str = None):
        super().__init__(name=name)
        self.conv = layers.Conv2D(dim,
                                  kernel_size=4,
                                  strides=4,
                                  padding="same",
                                  kernel_initializer=KERNEL_INITIALIZER,
                                  bias_initializer=BIAS_INITIALIZER,
                                  name="conv2d")
        self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm")

    def call(self, x, training=False):
        x = self.conv(x)
        x = self.norm(x, training=training)
        return x


class DownSample(layers.Layer):
    def __init__(self, dim, name: str = None):
        super().__init__(name=name)
        self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm")
        self.conv = layers.Conv2D(dim,
                                  kernel_size=2,
                                  strides=2,
                                  padding="same",
                                  kernel_initializer=KERNEL_INITIALIZER,
                                  bias_initializer=BIAS_INITIALIZER,
                                  name="conv2d")

    def call(self, x, training=False):
        x = self.norm(x, training=training)
        x = self.conv(x)
        return x


class ConvNeXt(Model):
    r""" ConvNeXt
        A Tensorflow impl of : `A ConvNet for the 2020s`  -
          https://arxiv.org/pdf/2201.03545.pdf
    Args:
        num_classes (int): Number of classes for classification head. Default: 1000
        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
    """
    def __init__(self, num_classes: int, depths: list, dims: list, drop_path_rate: float = 0.,
                 layer_scale_init_value: float = 1e-6):
        super().__init__()
        self.stem = Stem(dims[0], name="stem")

        cur = 0
        dp_rates = np.linspace(start=0, stop=drop_path_rate, num=sum(depths))
        self.stage1 = [Block(dim=dims[0],
                             drop_rate=dp_rates[cur + i],
                             layer_scale_init_value=layer_scale_init_value,
                             name=f"stage1_block{i}")
                       for i in range(depths[0])]
        cur += depths[0]

        self.downsample2 = DownSample(dims[1], name="downsample2")
        self.stage2 = [Block(dim=dims[1],
                             drop_rate=dp_rates[cur + i],
                             layer_scale_init_value=layer_scale_init_value,
                             name=f"stage2_block{i}")
                       for i in range(depths[1])]
        cur += depths[1]

        self.downsample3 = DownSample(dims[2], name="downsample3")
        self.stage3 = [Block(dim=dims[2],
                             drop_rate=dp_rates[cur + i],
                             layer_scale_init_value=layer_scale_init_value,
                             name=f"stage3_block{i}")
                       for i in range(depths[2])]
        cur += depths[2]

        self.downsample4 = DownSample(dims[3], name="downsample4")
        self.stage4 = [Block(dim=dims[3],
                             drop_rate=dp_rates[cur + i],
                             layer_scale_init_value=layer_scale_init_value,
                             name=f"stage4_block{i}")
                       for i in range(depths[3])]

        self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm")
        self.head = layers.Dense(units=num_classes,
                                 kernel_initializer=KERNEL_INITIALIZER,
                                 bias_initializer=BIAS_INITIALIZER,
                                 name="head")

    def call(self, x, training=False):
        x = self.stem(x, training=training)
        for block in self.stage1:
            x = block(x, training=training)

        x = self.downsample2(x, training=training)
        for block in self.stage2:
            x = block(x, training=training)

        x = self.downsample3(x, training=training)
        for block in self.stage3:
            x = block(x, training=training)

        x = self.downsample4(x, training=training)
        for block in self.stage4:
            x = block(x, training=training)

        x = tf.reduce_mean(x, axis=[1, 2])
        x = self.norm(x, training=training)
        x = self.head(x)
        return x


def convnext_tiny(num_classes: int):
    model = ConvNeXt(depths=[3, 3, 9, 3],
                     dims=[96, 192, 384, 768],
                     num_classes=num_classes)
    return model


def convnext_small(num_classes: int):
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[96, 192, 384, 768],
                     num_classes=num_classes)
    return model


def convnext_base(num_classes: int):
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[128, 256, 512, 1024],
                     num_classes=num_classes)
    return model


def convnext_large(num_classes: int):
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[192, 384, 768, 1536],
                     num_classes=num_classes)
    return model


def convnext_xlarge(num_classes: int):
    model = ConvNeXt(depths=[3, 3, 27, 3],
                     dims=[256, 512, 1024, 2048],
                     num_classes=num_classes)
    return model


================================================
FILE: tensorflow_classification/ConvNeXt/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt

from model import convnext_tiny as create_model


def main():
    num_classes = 5
    im_height = im_width = 224

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # read image
    img = np.array(img).astype(np.float32)

    # preprocess
    img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=num_classes)
    model.build([1, 224, 224, 3])

    weights_path = './save_weights/model.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img, batch_size=1))
    result = tf.keras.layers.Softmax()(result)
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/ConvNeXt/train.py
================================================
import os
import re
import sys
import datetime

import tensorflow as tf
from tqdm import tqdm

from model import convnext_tiny as create_model
from utils import generate_ds, cosine_scheduler

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    batch_size = 8
    epochs = 10
    num_classes = 5
    freeze_layers = False
    initial_lr = 0.005
    weight_decay = 5e-4

    log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root, batch_size=batch_size, val_rate=0.2)

    # create model
    model = create_model(num_classes=num_classes)
    model.build((1, 224, 224, 3))

    # 下载我提前转好的预训练权重
    # 链接: https://pan.baidu.com/s/1MtYJ3FCAkiPwaMRKuyZN1Q  密码: 1cgp
    # load weights
    pre_weights_path = './convnext_tiny_1k_224.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    # freeze bottom layers
    if freeze_layers:
        for layer in model.layers:
            if "head" not in layer.name:
                layer.trainable = False
            else:
                print("training {}".format(layer.name))

    model.summary()

    # custom learning rate scheduler
    scheduler = cosine_scheduler(initial_lr, epochs, len(train_ds), train_writer=train_writer)

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            ce_loss = loss_object(train_labels, output)

            # l2 loss
            matcher = re.compile(".*(bias|gamma|beta).*")
            l2loss = weight_decay * tf.add_n([
                tf.nn.l2_loss(v)
                for v in model.trainable_variables
                if not matcher.match(v.name)
            ])

            loss = ce_loss + l2loss

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_loss(ce_loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            # update learning rate
            optimizer.learning_rate = next(scheduler)

            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}, lr:{:.5f}".format(
                epoch + 1,
                epochs,
                train_loss.result(),
                train_accuracy.result(),
                optimizer.learning_rate.numpy()
            )

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())
        # writing training loss and acc
        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        # writing validation loss and acc
        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            save_name = "./save_weights/model.ckpt"
            model.save_weights(save_name, save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/ConvNeXt/trans_weights.py
================================================
import torch
from model import *


def transpose_weights(m_type, w_dict, k, v):
    if m_type == "conv":
        if len(v.shape) > 1:
            # conv weights
            v = np.transpose(v.numpy(), (2, 3, 1, 0)).astype(np.float32)
        w_dict[k] = v
    elif m_type == "dwconv":
        if len(v.shape) > 1:
            # dwconv weights
            v = np.transpose(v.numpy(), (2, 3, 0, 1)).astype(np.float32)
        w_dict[k] = v
    elif m_type == "linear":
        if len(v.shape) > 1:
            v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)
        w_dict[k] = v
    elif m_type == "norm":
        w_dict[k] = v
    else:
        ValueError(f"not support type:{m_type}")


def main(weights_path: str,
         model_name: str,
         model: tf.keras.Model):
    var_dict = {v.name.split(':')[0]: v for v in model.weights}

    weights_dict = torch.load(weights_path, map_location="cpu")["model"]
    w_dict = {}
    for k, v in weights_dict.items():
        if "downsample_layers" in k:
            split_k = k.split(".")
            if split_k[1] == "0":
                if split_k[2] == "0":
                    k = "stem/conv2d/" + split_k[-1]
                    k = k.replace("weight", "kernel")
                    transpose_weights("conv", w_dict, k, v)
                else:
                    k = "stem/norm/" + split_k[-1]
                    k = k.replace("weight", "gamma")
                    k = k.replace("bias", "beta")
                    transpose_weights("norm", w_dict, k, v)
            else:
                stage = int(split_k[1]) + 1
                if split_k[2] == "1":
                    k = f"downsample{stage}/conv2d/" + split_k[-1]
                    k = k.replace("weight", "kernel")
                    transpose_weights("conv", w_dict, k, v)
                else:
                    k = f"downsample{stage}/norm/" + split_k[-1]
                    k = k.replace("weight", "gamma")
                    k = k.replace("bias", "beta")
                    transpose_weights("norm", w_dict, k, v)
        elif "stages" in k:
            split_k = k.split(".")
            stage = int(split_k[1]) + 1
            block = int(split_k[2])
            if "dwconv" in k:
                k = f"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}"
                k = k.replace("weight", "depthwise_kernel")
                transpose_weights("dwconv", w_dict, k, v)
            elif "pwconv" in k:
                k = f"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}"
                k = k.replace("weight", "kernel")
                transpose_weights("linear", w_dict, k, v)
            elif "norm" in k:
                k = f"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}"
                k = k.replace("weight", "gamma")
                k = k.replace("bias", "beta")
                transpose_weights("norm", w_dict, k, v)
            elif "gamma" in k:
                k = f"stage{stage}_block{block}/{split_k[-1]}"
                transpose_weights("norm", w_dict, k, v)
            else:
                ValueError(f"unrecognized {k}")
        elif "norm" in k:
            split_k = k.split(".")
            k = f"norm/{split_k[-1]}"
            k = k.replace("weight", "gamma")
            k = k.replace("bias", "beta")
            transpose_weights("norm", w_dict, k, v)
        elif "head" in k:
            split_k = k.split(".")
            k = f"head/{split_k[-1]}"
            k = k.replace("weight", "kernel")
            transpose_weights("linear", w_dict, k, v)
        else:
            ValueError(f"unrecognized {k}")

    for key, var in var_dict.items():
        if key in w_dict:
            if w_dict[key].shape != var.shape:
                msg = "shape mismatch: {}".format(key)
                print(msg)
            else:
                var.assign(w_dict[key], read_value=False)
        else:
            msg = "Not found {} in {}".format(key, weights_path)
            print(msg)

    model.save_weights("./{}.h5".format(model_name))


if __name__ == '__main__':
    model = convnext_tiny(num_classes=1000)
    model.build((1, 224, 224, 3))
    # https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth
    main(weights_path="./convnext_tiny_1k_224_ema.pth",
         model_name="convnext_tiny_1k_224",
         model=model)

    # model = convnext_small(num_classes=1000)
    # model.build((1, 224, 224, 3))
    # # https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth
    # main(weights_path="./convnext_small_1k_224_ema.pth",
    #      model_name="convnext_small_1k_224",
    #      model=model)

    # model = convnext_base(num_classes=1000)
    # model.build((1, 224, 224, 3))
    # # https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth
    # main(weights_path="./convnext_base_1k_224_ema.pth",
    #      model_name="convnext_base_1k_224",
    #      model=model)

    # model = convnext_base(num_classes=21841)
    # model.build((1, 224, 224, 3))
    # # https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth
    # main(weights_path="./convnext_base_22k_224.pth",
    #      model_name="convnext_base_22k_224",
    #      model=model)

    # model = convnext_large(num_classes=1000)
    # model.build((1, 224, 224, 3))
    # # https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth
    # main(weights_path="./convnext_large_1k_224_ema.pth",
    #      model_name="convnext_large_1k_224",
    #      model=model)

    # model = convnext_large(num_classes=21841)
    # model.build((1, 224, 224, 3))
    # # https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth
    # main(weights_path="./convnext_large_22k_224.pth",
    #      model_name="convnext_large_22k_224",
    #      model=model)


================================================
FILE: tensorflow_classification/ConvNeXt/utils.py
================================================
import os
import json
import random
import math

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                train_im_height: int = 224,
                train_im_width: int = 224,
                val_im_height: int = None,
                val_im_width: int = None,
                batch_size: int = 8,
                val_rate: float = 0.1,
                cache_data: bool = False):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param train_im_height: 训练输入网络图像的高度
    :param train_im_width:  训练输入网络图像的宽度
    :param val_im_height: 验证输入网络图像的高度
    :param val_im_width:  验证输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :param cache_data: 是否缓存数据
    :return:
    """
    assert train_im_height is not None
    assert train_im_width is not None
    if val_im_width is None:
        val_im_width = train_im_width
    if val_im_height is None:
        val_im_height = train_im_height

    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)
        image = tf.image.random_flip_left_right(image)
        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)
        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False,
                                  cache: bool = False):
        if cache:
            ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val, cache=False)

    return train_ds, val_ds


def cosine_rate(now_step, total_step, end_lr_rate):
    rate = ((1 + math.cos(now_step * math.pi / total_step)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine
    return rate


def cosine_scheduler(initial_lr, epochs, steps, warmup_epochs=1, end_lr_rate=1e-6, train_writer=None):
    """custom learning rate scheduler"""
    assert warmup_epochs < epochs
    warmup = np.linspace(start=1e-8, stop=initial_lr, num=warmup_epochs*steps)
    remainder_steps = (epochs - warmup_epochs) * steps
    cosine = initial_lr * np.array([cosine_rate(i, remainder_steps, end_lr_rate) for i in range(remainder_steps)])
    lr_list = np.concatenate([warmup, cosine])

    for i in range(len(lr_list)):
        new_lr = lr_list[i]
        if train_writer is not None:
            # writing lr into tensorboard
            with train_writer.as_default():
                tf.summary.scalar('learning rate', data=new_lr, step=i)
        yield new_lr


================================================
FILE: tensorflow_classification/README.md
================================================
## 该文件夹存放使用tensorflow实现的代码版本
**model.py**： 是模型文件  
**train.py**： 是调用模型训练的文件    
**predict.py**： 是调用模型进行预测的文件  
**class_indices.json**： 是训练数据集对应的标签文件   

------
若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。   
[点击这里](../data_set/README.md)会告诉你如何去下载数据集，以及提供了现成的划分数据集脚本  

================================================
FILE: tensorflow_classification/Test11_efficientnetV2/model.py
================================================
"""
official code:
https://github.com/google/automl/tree/master/efficientnetv2
"""

import itertools

import tensorflow as tf
from tensorflow.keras import layers, Model, Input


CONV_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 2.0,
        'mode': 'fan_out',
        'distribution': 'truncated_normal'
    }
}

DENSE_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 1. / 3.,
        'mode': 'fan_out',
        'distribution': 'uniform'
    }
}


class SE(layers.Layer):
    def __init__(self,
                 se_filters: int,
                 output_filters: int,
                 name: str = None):
        super(SE, self).__init__(name=name)

        self.se_reduce = layers.Conv2D(filters=se_filters,
                                       kernel_size=1,
                                       strides=1,
                                       padding="same",
                                       activation="swish",
                                       use_bias=True,
                                       kernel_initializer=CONV_KERNEL_INITIALIZER,
                                       name="conv2d")

        self.se_expand = layers.Conv2D(filters=output_filters,
                                       kernel_size=1,
                                       strides=1,
                                       padding="same",
                                       activation="sigmoid",
                                       use_bias=True,
                                       kernel_initializer=CONV_KERNEL_INITIALIZER,
                                       name="conv2d_1")

    def call(self, inputs, **kwargs):
        # Tensor: [N, H, W, C] -> [N, 1, 1, C]
        se_tensor = tf.reduce_mean(inputs, [1, 2], keepdims=True)
        se_tensor = self.se_reduce(se_tensor)
        se_tensor = self.se_expand(se_tensor)
        return se_tensor * inputs


class MBConv(layers.Layer):
    def __init__(self,
                 kernel_size: int,
                 input_c: int,
                 out_c: int,
                 expand_ratio: int,
                 stride: int,
                 se_ratio: float = 0.25,
                 drop_rate: float = 0.,
                 name: str = None):
        super(MBConv, self).__init__(name=name)

        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.has_shortcut = (stride == 1 and input_c == out_c)
        expanded_c = input_c * expand_ratio

        bid = itertools.count(0)
        get_norm_name = lambda: 'batch_normalization' + ('' if not next(
            bid) else '_' + str(next(bid) // 2))
        cid = itertools.count(0)
        get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str(
            next(cid) // 2))

        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在
        assert expand_ratio != 1
        # Point-wise expansion
        self.expand_conv = layers.Conv2D(
            filters=expanded_c,
            kernel_size=1,
            strides=1,
            padding="same",
            use_bias=False,
            name=get_conv_name())
        self.norm0 = layers.BatchNormalization(
            axis=-1,
            momentum=0.9,
            epsilon=1e-3,
            name=get_norm_name())
        self.act0 = layers.Activation("swish")

        # Depth-wise convolution
        self.depthwise_conv = layers.DepthwiseConv2D(
            kernel_size=kernel_size,
            strides=stride,
            depthwise_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name="depthwise_conv2d")
        self.norm1 = layers.BatchNormalization(
            axis=-1,
            momentum=0.9,
            epsilon=1e-3,
            name=get_norm_name())
        self.act1 = layers.Activation("swish")

        # SE
        num_reduced_filters = max(1, int(input_c * se_ratio))
        self.se = SE(num_reduced_filters, expanded_c, name="se")

        # Point-wise linear projection
        self.project_conv = layers.Conv2D(
            filters=out_c,
            kernel_size=1,
            strides=1,
            kernel_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name=get_conv_name())
        self.norm2 = layers.BatchNormalization(
            axis=-1,
            momentum=0.9,
            epsilon=1e-3,
            name=get_norm_name())

        self.drop_rate = drop_rate
        if self.has_shortcut and drop_rate > 0:
            # Stochastic Depth
            self.drop_path = layers.Dropout(rate=drop_rate,
                                            noise_shape=(None, 1, 1, 1),  # binary dropout mask
                                            name="drop_path")

    def call(self, inputs, training=None):
        x = inputs

        x = self.expand_conv(x)
        x = self.norm0(x, training=training)
        x = self.act0(x)

        x = self.depthwise_conv(x)
        x = self.norm1(x, training=training)
        x = self.act1(x)

        x = self.se(x)

        x = self.project_conv(x)
        x = self.norm2(x, training=training)

        if self.has_shortcut:
            if self.drop_rate > 0:
                x = self.drop_path(x, training=training)

            x = tf.add(x, inputs)

        return x


class FusedMBConv(layers.Layer):
    def __init__(self,
                 kernel_size: int,
                 input_c: int,
                 out_c: int,
                 expand_ratio: int,
                 stride: int,
                 se_ratio: float,
                 drop_rate: float = 0.,
                 name: str = None):
        super(FusedMBConv, self).__init__(name=name)
        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        assert se_ratio == 0.

        self.has_shortcut = (stride == 1 and input_c == out_c)
        self.has_expansion = expand_ratio != 1
        expanded_c = input_c * expand_ratio

        bid = itertools.count(0)
        get_norm_name = lambda: 'batch_normalization' + ('' if not next(
            bid) else '_' + str(next(bid) // 2))
        cid = itertools.count(0)
        get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str(
            next(cid) // 2))

        if expand_ratio != 1:
            self.expand_conv = layers.Conv2D(
                filters=expanded_c,
                kernel_size=kernel_size,
                strides=stride,
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                padding="same",
                use_bias=False,
                name=get_conv_name())
            self.norm0 = layers.BatchNormalization(
                axis=-1,
                momentum=0.9,
                epsilon=1e-3,
                name=get_norm_name())
            self.act0 = layers.Activation("swish")

        self.project_conv = layers.Conv2D(
            filters=out_c,
            kernel_size=1 if expand_ratio != 1 else kernel_size,
            strides=1 if expand_ratio != 1 else stride,
            kernel_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name=get_conv_name())
        self.norm1 = layers.BatchNormalization(
            axis=-1,
            momentum=0.9,
            epsilon=1e-3,
            name=get_norm_name())

        if expand_ratio == 1:
            self.act1 = layers.Activation("swish")

        self.drop_rate = drop_rate
        if self.has_shortcut and drop_rate > 0:
            # Stochastic Depth
            self.drop_path = layers.Dropout(rate=drop_rate,
                                            noise_shape=(None, 1, 1, 1),  # binary dropout mask
                                            name="drop_path")

    def call(self, inputs, training=None):
        x = inputs
        if self.has_expansion:
            x = self.expand_conv(x)
            x = self.norm0(x, training=training)
            x = self.act0(x)

        x = self.project_conv(x)
        x = self.norm1(x, training=training)
        if self.has_expansion is False:
            x = self.act1(x)

        if self.has_shortcut:
            if self.drop_rate > 0:
                x = self.drop_path(x, training=training)

            x = tf.add(x, inputs)

        return x


class Stem(layers.Layer):
    def __init__(self, filters: int, name: str = None):
        super(Stem, self).__init__(name=name)
        self.conv_stem = layers.Conv2D(
            filters=filters,
            kernel_size=3,
            strides=2,
            kernel_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name="conv2d")
        self.norm = layers.BatchNormalization(
            axis=-1,
            momentum=0.9,
            epsilon=1e-3,
            name="batch_normalization")
        self.act = layers.Activation("swish")

    def call(self, inputs, training=None):
        x = self.conv_stem(inputs)
        x = self.norm(x, training=training)
        x = self.act(x)

        return x


class Head(layers.Layer):
    def __init__(self,
                 filters: int = 1280,
                 num_classes: int = 1000,
                 drop_rate: float = 0.,
                 name: str = None):
        super(Head, self).__init__(name=name)
        self.conv_head = layers.Conv2D(
            filters=filters,
            kernel_size=1,
            kernel_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name="conv2d")
        self.norm = layers.BatchNormalization(
            axis=-1,
            momentum=0.9,
            epsilon=1e-3,
            name="batch_normalization")
        self.act = layers.Activation("swish")

        self.avg = layers.GlobalAveragePooling2D()
        self.fc = layers.Dense(num_classes,
                               kernel_initializer=DENSE_KERNEL_INITIALIZER)

        if drop_rate > 0:
            self.dropout = layers.Dropout(drop_rate)

    def call(self, inputs, training=None):
        x = self.conv_head(inputs)
        x = self.norm(x)
        x = self.act(x)
        x = self.avg(x)

        if self.dropout:
            x = self.dropout(x, training=training)

        x = self.fc(x)
        return x


class EfficientNetV2(Model):
    def __init__(self,
                 model_cnf: list,
                 num_classes: int = 1000,
                 num_features: int = 1280,
                 dropout_rate: float = 0.2,
                 drop_connect_rate: float = 0.2,
                 name: str = None):
        super(EfficientNetV2, self).__init__(name=name)

        for cnf in model_cnf:
            assert len(cnf) == 8

        stem_filter_num = model_cnf[0][4]
        self.stem = Stem(stem_filter_num)

        total_blocks = sum([i[0] for i in model_cnf])
        block_id = 0
        self.blocks = []
        # Builds blocks.
        for cnf in model_cnf:
            repeats = cnf[0]
            op = FusedMBConv if cnf[-2] == 0 else MBConv
            for i in range(repeats):
                self.blocks.append(op(kernel_size=cnf[1],
                                      input_c=cnf[4] if i == 0 else cnf[5],
                                      out_c=cnf[5],
                                      expand_ratio=cnf[3],
                                      stride=cnf[2] if i == 0 else 1,
                                      se_ratio=cnf[-1],
                                      drop_rate=drop_connect_rate * block_id / total_blocks,
                                      name="blocks_{}".format(block_id)))
                block_id += 1

        self.head = Head(num_features, num_classes, dropout_rate)

    # def summary(self, input_shape=(224, 224, 3), **kwargs):
    #     x = Input(shape=input_shape)
    #     model = Model(inputs=[x], outputs=self.call(x, training=True))
    #     return model.summary()

    def call(self, inputs, training=None):
        x = self.stem(inputs, training)

        # call for blocks.
        for _, block in enumerate(self.blocks):
            x = block(x, training=training)

        x = self.head(x, training=training)

        return x


def efficientnetv2_s(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 300, eval_size: 384

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],
                    [4, 3, 2, 4, 24, 48, 0, 0],
                    [4, 3, 2, 4, 48, 64, 0, 0],
                    [6, 3, 2, 4, 64, 128, 1, 0.25],
                    [9, 3, 1, 6, 128, 160, 1, 0.25],
                    [15, 3, 2, 6, 160, 256, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.2,
                           name="efficientnetv2-s")
    return model


def efficientnetv2_m(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 384, eval_size: 480

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],
                    [5, 3, 2, 4, 24, 48, 0, 0],
                    [5, 3, 2, 4, 48, 80, 0, 0],
                    [7, 3, 2, 4, 80, 160, 1, 0.25],
                    [14, 3, 1, 6, 160, 176, 1, 0.25],
                    [18, 3, 2, 6, 176, 304, 1, 0.25],
                    [5, 3, 1, 6, 304, 512, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.3,
                           name="efficientnetv2-m")
    return model


def efficientnetv2_l(num_classes: int = 1000):
    """
    EfficientNetV2
    https://arxiv.org/abs/2104.00298
    """
    # train_size: 384, eval_size: 480

    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],
                    [7, 3, 2, 4, 32, 64, 0, 0],
                    [7, 3, 2, 4, 64, 96, 0, 0],
                    [10, 3, 2, 4, 96, 192, 1, 0.25],
                    [19, 3, 1, 6, 192, 224, 1, 0.25],
                    [25, 3, 2, 6, 224, 384, 1, 0.25],
                    [7, 3, 1, 6, 384, 640, 1, 0.25]]

    model = EfficientNetV2(model_cnf=model_config,
                           num_classes=num_classes,
                           dropout_rate=0.4,
                           name="efficientnetv2-l")
    return model


# m = efficientnetv2_s()
# m.summary()


================================================
FILE: tensorflow_classification/Test11_efficientnetV2/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt

from model import efficientnetv2_s as create_model


def main():
    num_classes = 5

    img_size = {"s": 384,
                "m": 480,
                "l": 480}
    num_model = "s"
    im_height = im_width = img_size[num_model]

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # read image
    img = np.array(img).astype(np.float32)

    # preprocess
    img = (img / 255. - 0.5) / 0.5

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=num_classes)

    weights_path = './save_weights/efficientnetv2.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img))
    result = tf.keras.layers.Softmax()(result)
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test11_efficientnetV2/train.py
================================================
import os
import sys
import math
import datetime

import tensorflow as tf
from tqdm import tqdm

from model import efficientnetv2_s as create_model
from utils import generate_ds

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    img_size = {"s": [300, 384],  # train_size, val_size
                "m": [384, 480],
                "l": [384, 480]}
    num_model = "s"

    batch_size = 8
    epochs = 30
    num_classes = 5
    freeze_layers = True
    initial_lr = 0.01

    log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root,
                                   train_im_height=img_size[num_model][0],
                                   train_im_width=img_size[num_model][0],
                                   val_im_height=img_size[num_model][1],
                                   val_im_width=img_size[num_model][1],
                                   batch_size=batch_size)

    # create model
    model = create_model(num_classes=num_classes)
    model.build((1, img_size[num_model][0], img_size[num_model][0], 3))

    # 下载我提前转好的预训练权重
    # 链接: https://pan.baidu.com/s/1Pr-pO5sQVySPQnBY8pQH7w  密码: f6hi
    # load weights
    pre_weights_path = './efficientnetv2-s.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    # freeze bottom layers
    if freeze_layers:
        unfreeze_layers = "head"
        for layer in model.layers:
            if unfreeze_layers not in layer.name:
                layer.trainable = False
            else:
                print("training {}".format(layer.name))

    model.summary()

    # custom learning rate curve
    def scheduler(now_epoch):
        end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate
        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine
        new_lr = rate * initial_lr

        # writing lr into tensorboard
        with train_writer.as_default():
            tf.summary.scalar('learning rate', data=new_lr, step=epoch)

        return new_lr

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            loss = loss_object(train_labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # update learning rate
        optimizer.learning_rate = scheduler(epoch)

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())
        # writing training loss and acc
        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        # writing validation loss and acc
        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            save_name = "./save_weights/efficientnetv2.ckpt"
            model.save_weights(save_name, save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test11_efficientnetV2/trans_weights.py
================================================
from model import *


def main(ckpt_path: str,
         model_name: str,
         model: tf.keras.Model):
    var_dict = {v.name.split(':')[0]: v for v in model.weights}

    reader = tf.train.load_checkpoint(ckpt_path)
    var_shape_map = reader.get_variable_to_shape_map()

    for key, var in var_dict.items():
        key_ = model_name + "/" + key
        key_ = key_.replace("batch_normalization", "tpu_batch_normalization")
        if key_ in var_shape_map:
            if var_shape_map[key_] != var.shape:
                msg = "shape mismatch: {}".format(key)
                print(msg)
            else:
                var.assign(reader.get_tensor(key_), read_value=False)
        else:
            msg = "Not found {} in {}".format(key, ckpt_path)
            print(msg)

    model.save_weights("./{}.h5".format(model_name))


if __name__ == '__main__':
    model = efficientnetv2_s()
    model.build((1, 224, 224, 3))
    main(ckpt_path="./efficientnetv2-s-21k-ft1k/model",
         model_name="efficientnetv2-s",
         model=model)

    # model = efficientnetv2_m()
    # model.build((1, 224, 224, 3))
    # main(ckpt_path="./efficientnetv2-m-21k-ft1k/model",
    #      model_name="efficientnetv2-m",
    #      model=model)

    # model = efficientnetv2_l()
    # model.build((1, 224, 224, 3))
    # main(ckpt_path="./efficientnetv2-l-21k-ft1k/model",
    #      model_name="efficientnetv2-l",
    #      model=model)


================================================
FILE: tensorflow_classification/Test11_efficientnetV2/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                train_im_height: int = None,
                train_im_width: int = None,
                val_im_height: int = None,
                val_im_width: int = None,
                batch_size: int = 8,
                val_rate: float = 0.1,
                cache_data: bool = False):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param train_im_height: 训练输入网络图像的高度
    :param train_im_width:  训练输入网络图像的宽度
    :param val_im_height: 验证输入网络图像的高度
    :param val_im_width:  验证输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :param cache_data: 是否缓存数据
    :return:
    """
    assert train_im_height is not None
    assert train_im_width is not None
    if val_im_width is None:
        val_im_width = train_im_width
    if val_im_height is None:
        val_im_height = train_im_height

    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)
        image = tf.image.random_flip_left_right(image)
        image = (image / 255. - 0.5) / 0.5
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)
        image = (image / 255. - 0.5) / 0.5
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False,
                                  cache: bool = False):
        if cache:
            ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val, cache=False)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/Test1_official_demo/model.py
================================================
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model


class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10, activation='softmax')

    def call(self, x, **kwargs):
        x = self.conv1(x)      # input[batch, 28, 28, 1] output[batch, 26, 26, 32]
        x = self.flatten(x)    # output [batch, 21632]
        x = self.d1(x)         # output [batch, 128]
        return self.d2(x)      # output [batch, 10]


================================================
FILE: tensorflow_classification/Test1_official_demo/train.py
================================================
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from model import MyModel


def main():
    mnist = tf.keras.datasets.mnist

    # download and load data
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Add a channels dimension
    x_train = x_train[..., tf.newaxis]
    x_test = x_test[..., tf.newaxis]

    # create data generator
    train_ds = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(10000).batch(32)
    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

    # create model
    model = MyModel()

    # define loss
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    # define optimizer
    optimizer = tf.keras.optimizers.Adam()

    # define train_loss and train_accuracy
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    # define train_loss and train_accuracy
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

    # define train function including calculating loss, applying gradient and calculating accuracy
    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, predictions)

    # define test function including calculating loss and calculating accuracy
    @tf.function
    def test_step(images, labels):
        predictions = model(images)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

    EPOCHS = 5

    for epoch in range(EPOCHS):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        for images, labels in train_ds:
            train_step(images, labels)

        for test_images, test_labels in test_ds:
            test_step(test_images, test_labels)

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch + 1,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test2_alexnet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: tensorflow_classification/Test2_alexnet/fine_train_alexnet.py
================================================
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import json
import os
import glob
from tensorflow.keras import layers, models


def AlexNet_pytorch(im_height=224, im_width=224, num_classes=1000):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")  # output(None, 224, 224, 3)
    x = layers.ZeroPadding2D(((2, 1), (2, 1)))(input_image)                      # output(None, 227, 227, 3)
    x = layers.Conv2D(64, kernel_size=11, strides=4, activation="relu")(x)       # output(None, 55, 55, 64)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 27, 27, 64)
    x = layers.Conv2D(192, kernel_size=5, padding="same", activation="relu")(x)  # output(None, 27, 27, 192)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 13, 13, 128)
    x = layers.Conv2D(384, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 384)
    x = layers.Conv2D(256, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 256)
    x = layers.Conv2D(256, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 256)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 6, 6, 256)

    x = layers.Flatten()(x)                         # output(None, 6*6*256)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(4096, activation="relu")(x)    # output(None, 4096)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(4096, activation="relu")(x)    # output(None, 4096)
    x = layers.Dense(num_classes)(x)                  # output(None, 5)
    predict = layers.Softmax()(x)

    model = models.Model(inputs=input_image, outputs=predict)
    return model


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 10

    def pre_function(img: np.ndarray):
        # from PIL import Image as im
        # import numpy as np
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img / 255.
        img = img - [0.485, 0.456, 0.406]
        img = img / [0.229, 0.224, 0.225]

        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(horizontal_flip=True,
                                               preprocessing_function=pre_function)
    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    model = AlexNet_pytorch(im_height=im_height, im_width=im_width, num_classes=5)

    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path)
    for layer_t in model.layers:
        if 'conv2d' in layer_t.name:
            layer_t.trainable = False

    model.summary()

    # using keras high level api for training
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=["accuracy"])

    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5',
                                                    save_best_only=True,
                                                    save_weights_only=True,
                                                    monitor='val_loss')]

    # tensorflow2.1 recommend to using fit
    history = model.fit(x=train_data_gen,
                        steps_per_epoch=total_train // batch_size,
                        epochs=epochs,
                        validation_data=val_data_gen,
                        validation_steps=total_val // batch_size,
                        callbacks=callbacks)

    # plot loss and accuracy image
    history_dict = history.history
    train_loss = history_dict["loss"]
    train_accuracy = history_dict["accuracy"]
    val_loss = history_dict["val_loss"]
    val_accuracy = history_dict["val_accuracy"]

    # figure 1
    plt.figure()
    plt.plot(range(epochs), train_loss, label='train_loss')
    plt.plot(range(epochs), val_loss, label='val_loss')
    plt.legend()
    plt.xlabel('epochs')
    plt.ylabel('loss')

    # figure 2
    plt.figure()
    plt.plot(range(epochs), train_accuracy, label='train_accuracy')
    plt.plot(range(epochs), val_accuracy, label='val_accuracy')
    plt.legend()
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test2_alexnet/model.py
================================================
from tensorflow.keras import layers, models, Model, Sequential


def AlexNet_v1(im_height=224, im_width=224, num_classes=1000):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")  # output(None, 224, 224, 3)
    x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image)                      # output(None, 227, 227, 3)
    x = layers.Conv2D(48, kernel_size=11, strides=4, activation="relu")(x)       # output(None, 55, 55, 48)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 27, 27, 48)
    x = layers.Conv2D(128, kernel_size=5, padding="same", activation="relu")(x)  # output(None, 27, 27, 128)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 13, 13, 128)
    x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 192)
    x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 192)
    x = layers.Conv2D(128, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 128)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 6, 6, 128)

    x = layers.Flatten()(x)                         # output(None, 6*6*128)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(2048, activation="relu")(x)    # output(None, 2048)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(2048, activation="relu")(x)    # output(None, 2048)
    x = layers.Dense(num_classes)(x)                  # output(None, 5)
    predict = layers.Softmax()(x)

    model = models.Model(inputs=input_image, outputs=predict)
    return model


class AlexNet_v2(Model):
    def __init__(self, num_classes=1000):
        super(AlexNet_v2, self).__init__()
        self.features = Sequential([
            layers.ZeroPadding2D(((1, 2), (1, 2))),                                 # output(None, 227, 227, 3)
            layers.Conv2D(48, kernel_size=11, strides=4, activation="relu"),        # output(None, 55, 55, 48)
            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 27, 27, 48)
            layers.Conv2D(128, kernel_size=5, padding="same", activation="relu"),   # output(None, 27, 27, 128)
            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 13, 13, 128)
            layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"),   # output(None, 13, 13, 192)
            layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"),   # output(None, 13, 13, 192)
            layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"),   # output(None, 13, 13, 128)
            layers.MaxPool2D(pool_size=3, strides=2)])                              # output(None, 6, 6, 128)

        self.flatten = layers.Flatten()
        self.classifier = Sequential([
            layers.Dropout(0.2),
            layers.Dense(1024, activation="relu"),                                  # output(None, 2048)
            layers.Dropout(0.2),
            layers.Dense(128, activation="relu"),                                   # output(None, 2048)
            layers.Dense(num_classes),                                                # output(None, 5)
            layers.Softmax()
        ])

    def call(self, inputs, **kwargs):
        x = self.features(inputs)
        x = self.flatten(x)
        x = self.classifier(x)
        return x


================================================
FILE: tensorflow_classification/Test2_alexnet/predict.py
================================================
import os
import json

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

from model import AlexNet_v1, AlexNet_v2


def main():
    im_height = 224
    im_width = 224

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)

    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # scaling pixel value to (0-1)
    img = np.array(img) / 255.

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = AlexNet_v1(num_classes=5)
    weighs_path = "./save_weights/myAlex.h5"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weighs_path)
    model.load_weights(weighs_path)

    # prediction
    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test2_alexnet/read_pth.py
================================================
import torch
import numpy as np
import tensorflow as tf


def rename_var(pth_path, new_ckpt_path, num_classes):
    pytorch_dict = torch.load(pth_path)

    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        new_var_list = []

        for key, value in pytorch_dict.items():
            if key in except_list:
                continue

            new_name = key
            value = value.detach().numpy()

            if 'features.0' in new_name:
                new_name = new_name.replace("features.0.weight", "conv2d/kernel")
                new_name = new_name.replace("features.0.bias", "conv2d/bias")

            if 'features.3' in new_name:
                new_name = new_name.replace("features.3.weight", "conv2d_1/kernel")
                new_name = new_name.replace("features.3.bias", "conv2d_1/bias")

            if 'features.6' in new_name:
                new_name = new_name.replace("features.6.weight", "conv2d_2/kernel")
                new_name = new_name.replace("features.6.bias", "conv2d_2/bias")

            if 'features.8' in new_name:
                new_name = new_name.replace("features.8.weight", "conv2d_3/kernel")
                new_name = new_name.replace("features.8.bias", "conv2d_3/bias")

            if 'features.10' in new_name:
                new_name = new_name.replace("features.10.weight", "conv2d_4/kernel")
                new_name = new_name.replace("features.10.bias", "conv2d_4/bias")

            if 'classifier.1' in new_name:
                new_name = new_name.replace("classifier.1.weight", "dense/kernel")
                new_name = new_name.replace("classifier.1.bias", "dense/bias")

            if 'classifier.4' in new_name:
                new_name = new_name.replace("classifier.4.weight", "dense_1/kernel")
                new_name = new_name.replace("classifier.4.bias", "dense_1/bias")

            if 'conv2d' in new_name and 'kernel' in new_name:
                value = np.transpose(value, (2, 3, 1, 0)).astype(np.float32)
            else:
                value = np.transpose(value).astype(np.float32)

            re_var = tf.Variable(value, name=new_name)
            new_var_list.append(re_var)

        re_var = tf.Variable(tf.keras.initializers.he_uniform()([4096, num_classes]), name="dense_2/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="dense_2/bias")
        new_var_list.append(re_var)

        saver = tf.compat.v1.train.Saver(new_var_list)
        sess.run(tf.compat.v1.global_variables_initializer())
        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)


except_list = ['classifier.6.weight', 'classifier.6.bias']
# https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth
pth_path = './alexnet-owt-4df8aa71.pth'
new_ckpt_path = './pretrain_weights.ckpt'
num_classes = 5
rename_var(pth_path, new_ckpt_path, num_classes)

================================================
FILE: tensorflow_classification/Test2_alexnet/train.py
================================================
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from model import AlexNet_v1, AlexNet_v2
import tensorflow as tf
import json
import os


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 10

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(rescale=1. / 255,
                                               horizontal_flip=True)
    validation_image_generator = ImageDataGenerator(rescale=1. / 255)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    # sample_training_images, sample_training_labels = next(train_data_gen)  # label is one-hot coding
    #
    # # This function will plot images in the form of a grid with 1 row
    # # and 5 columns where images are placed in each column.
    # def plotImages(images_arr):
    #     fig, axes = plt.subplots(1, 5, figsize=(20, 20))
    #     axes = axes.flatten()
    #     for img, ax in zip(images_arr, axes):
    #         ax.imshow(img)
    #         ax.axis('off')
    #     plt.tight_layout()
    #     plt.show()
    #
    #
    # plotImages(sample_training_images[:5])

    model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5)
    # model = AlexNet_v2(class_num=5)
    # model.build((batch_size, 224, 224, 3))  # when using subclass model
    model.summary()

    # using keras high level api for training
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=["accuracy"])

    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5',
                                                    save_best_only=True,
                                                    save_weights_only=True,
                                                    monitor='val_loss')]

    # tensorflow2.1 recommend to using fit
    history = model.fit(x=train_data_gen,
                        steps_per_epoch=total_train // batch_size,
                        epochs=epochs,
                        validation_data=val_data_gen,
                        validation_steps=total_val // batch_size,
                        callbacks=callbacks)

    # plot loss and accuracy image
    history_dict = history.history
    train_loss = history_dict["loss"]
    train_accuracy = history_dict["accuracy"]
    val_loss = history_dict["val_loss"]
    val_accuracy = history_dict["val_accuracy"]

    # figure 1
    plt.figure()
    plt.plot(range(epochs), train_loss, label='train_loss')
    plt.plot(range(epochs), val_loss, label='val_loss')
    plt.legend()
    plt.xlabel('epochs')
    plt.ylabel('loss')

    # figure 2
    plt.figure()
    plt.plot(range(epochs), train_accuracy, label='train_accuracy')
    plt.plot(range(epochs), val_accuracy, label='val_accuracy')
    plt.legend()
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.show()

    # history = model.fit_generator(generator=train_data_gen,
    #                               steps_per_epoch=total_train // batch_size,
    #                               epochs=epochs,
    #                               validation_data=val_data_gen,
    #                               validation_steps=total_val // batch_size,
    #                               callbacks=callbacks)

    # # using keras low level api for training
    # loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
    #
    # train_loss = tf.keras.metrics.Mean(name='train_loss')
    # train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')
    #
    # test_loss = tf.keras.metrics.Mean(name='test_loss')
    # test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
    #
    #
    # @tf.function
    # def train_step(images, labels):
    #     with tf.GradientTape() as tape:
    #         predictions = model(images, training=True)
    #         loss = loss_object(labels, predictions)
    #     gradients = tape.gradient(loss, model.trainable_variables)
    #     optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    #
    #     train_loss(loss)
    #     train_accuracy(labels, predictions)
    #
    #
    # @tf.function
    # def test_step(images, labels):
    #     predictions = model(images, training=False)
    #     t_loss = loss_object(labels, predictions)
    #
    #     test_loss(t_loss)
    #     test_accuracy(labels, predictions)
    #
    #
    # best_test_loss = float('inf')
    # for epoch in range(1, epochs+1):
    #     train_loss.reset_states()        # clear history info
    #     train_accuracy.reset_states()    # clear history info
    #     test_loss.reset_states()         # clear history info
    #     test_accuracy.reset_states()     # clear history info
    #     for step in range(total_train // batch_size):
    #         images, labels = next(train_data_gen)
    #         train_step(images, labels)
    #
    #     for step in range(total_val // batch_size):
    #         test_images, test_labels = next(val_data_gen)
    #         test_step(test_images, test_labels)
    #
    #     template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    #     print(template.format(epoch,
    #                           train_loss.result(),
    #                           train_accuracy.result() * 100,
    #                           test_loss.result(),
    #                           test_accuracy.result() * 100))
    #     if test_loss.result() < best_test_loss:
    #        model.save_weights("./save_weights/myAlex.ckpt", save_format='tf')


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test2_alexnet/trainGPU.py
================================================
import matplotlib.pyplot as plt
from model import AlexNet_v1, AlexNet_v2
import tensorflow as tf
import json
import os
import time
import glob
import random
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def main():
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)
            exit(-1)

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 10

    # class dict
    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]
    class_num = len(data_class)
    class_dict = dict((value, index) for index, value in enumerate(data_class))

    # reverse value and key of dict
    inverse_dict = dict((val, key) for key, val in class_dict.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    # load train images list
    train_image_list = glob.glob(train_dir+"/*/*.jpg")
    random.shuffle(train_image_list)
    train_num = len(train_image_list)
    assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir)
    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]

    # load validation images list
    val_image_list = glob.glob(validation_dir+"/*/*.jpg")
    random.shuffle(val_image_list)
    val_num = len(val_image_list)
    assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir)
    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    def process_path(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        return image, label

    AUTOTUNE = tf.data.experimental.AUTOTUNE

    # load train dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))
    train_dataset = train_dataset.shuffle(buffer_size=train_num)\
                                 .map(process_path, num_parallel_calls=AUTOTUNE)\
                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)

    # load train dataset
    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))
    val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                             .repeat().batch(batch_size)

    # 实例化模型
    model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5)
    # model = AlexNet_v2(class_num=5)
    # model.build((batch_size, 224, 224, 3))  # when using subclass model
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, predictions)

    @tf.function
    def test_step(images, labels):
        predictions = model(images, training=False)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

    best_test_loss = float('inf')
    train_step_num = train_num // batch_size
    val_step_num = val_num // batch_size
    for epoch in range(1, epochs+1):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        t1 = time.perf_counter()
        for index, (images, labels) in enumerate(train_dataset):
            train_step(images, labels)
            if index+1 == train_step_num:
                break
        print(time.perf_counter()-t1)

        for index, (images, labels) in enumerate(val_dataset):
            test_step(images, labels)
            if index+1 == val_step_num:
                break

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))
        if test_loss.result() < best_test_loss:
            model.save_weights("./save_weights/myAlex.ckpt".format(epoch), save_format='tf')

    # # using keras high level api for training
    # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    #               loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    #               metrics=["accuracy"])
    #
    # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex_{epoch}.h5',
    #                                                 save_best_only=True,
    #                                                 save_weights_only=True,
    #                                                 monitor='val_loss')]
    #
    # # tensorflow2.1 recommend to using fit
    # history = model.fit(x=train_dataset,
    #                     steps_per_epoch=train_num // batch_size,
    #                     epochs=epochs,
    #                     validation_data=val_dataset,
    #                     validation_steps=val_num // batch_size,
    #                     callbacks=callbacks)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test3_vgg/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: tensorflow_classification/Test3_vgg/fine_train_vgg16.py
================================================
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from model import vgg
import tensorflow as tf
import json
import os
import glob


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 10

    _R_MEAN = 123.68
    _G_MEAN = 116.78
    _B_MEAN = 103.94

    def pre_function(img):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img - [_R_MEAN, _G_MEAN, _B_MEAN]

        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(horizontal_flip=True,
                                               preprocessing_function=pre_function)
    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    model = vgg("vgg16", 224, 224, 5)

    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path)
    for layer_t in model.layers:
        if layer_t.name == 'feature':
            layer_t.trainable = False
            break

    model.summary()

    # using keras high level api for training
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=["accuracy"])

    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex_{epoch}.h5',
                                                    save_best_only=True,
                                                    save_weights_only=True,
                                                    monitor='val_loss')]

    # tensorflow2.1 recommend to using fit
    history = model.fit(x=train_data_gen,
                        steps_per_epoch=total_train // batch_size,
                        epochs=epochs,
                        validation_data=val_data_gen,
                        validation_steps=total_val // batch_size,
                        callbacks=callbacks)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test3_vgg/model.py
================================================
from tensorflow.keras import layers, Model, Sequential

CONV_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 2.0,
        'mode': 'fan_out',
        'distribution': 'truncated_normal'
    }
}

DENSE_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 1. / 3.,
        'mode': 'fan_out',
        'distribution': 'uniform'
    }
}


def VGG(feature, im_height=224, im_width=224, num_classes=1000):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    x = feature(input_image)
    x = layers.Flatten()(x)
    x = layers.Dropout(rate=0.5)(x)
    x = layers.Dense(2048, activation='relu',
                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
    x = layers.Dropout(rate=0.5)(x)
    x = layers.Dense(2048, activation='relu',
                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
    x = layers.Dense(num_classes,
                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
    output = layers.Softmax()(x)
    model = Model(inputs=input_image, outputs=output)
    return model


def make_feature(cfg):
    feature_layers = []
    for v in cfg:
        if v == "M":
            feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2))
        else:
            conv2d = layers.Conv2D(v, kernel_size=3, padding="SAME", activation="relu",
                                   kernel_initializer=CONV_KERNEL_INITIALIZER)
            feature_layers.append(conv2d)
    return Sequential(feature_layers, name="feature")


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg(model_name="vgg16", im_height=224, im_width=224, num_classes=1000):
    assert model_name in cfgs.keys(), "not support model {}".format(model_name)
    cfg = cfgs[model_name]
    model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes)
    return model


================================================
FILE: tensorflow_classification/Test3_vgg/predict.py
================================================
import os
import json

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

from model import vgg


def main():
    im_height = 224
    im_width = 224
    num_classes = 5

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # scaling pixel value to (0-1)
    img = np.array(img) / 255.

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = vgg("vgg16", im_height=im_height, im_width=im_width, num_classes=num_classes)
    weights_path = "./save_weights/myVGG.h5"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_weights(weights_path)

    # prediction
    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test3_vgg/read_ckpt.py
================================================
import tensorflow as tf


def rename_var(ckpt_path, new_ckpt_path, num_classes=5):
    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        var_list = tf.train.list_variables(ckpt_path)
        new_var_list = []

        for var_name, shape in var_list:
            # print(var_name)
            if var_name in except_list:
                continue

            var = tf.train.load_variable(ckpt_path, var_name)
            new_var_name = var_name.replace('vgg_16', 'feature')
            new_var_name = new_var_name.replace("weights", "kernel")
            new_var_name = new_var_name.replace("biases", "bias")

            new_var_name = new_var_name.replace("conv1/conv1_1", "conv2d")
            new_var_name = new_var_name.replace("conv1/conv1_2", "conv2d_1")

            new_var_name = new_var_name.replace("conv2/conv2_1", "conv2d_2")
            new_var_name = new_var_name.replace("conv2/conv2_2", "conv2d_3")

            new_var_name = new_var_name.replace("conv3/conv3_1", "conv2d_4")
            new_var_name = new_var_name.replace("conv3/conv3_2", "conv2d_5")
            new_var_name = new_var_name.replace("conv3/conv3_3", "conv2d_6")

            new_var_name = new_var_name.replace("conv4/conv4_1", "conv2d_7")
            new_var_name = new_var_name.replace("conv4/conv4_2", "conv2d_8")
            new_var_name = new_var_name.replace("conv4/conv4_3", "conv2d_9")

            new_var_name = new_var_name.replace("conv5/conv5_1", "conv2d_10")
            new_var_name = new_var_name.replace("conv5/conv5_2", "conv2d_11")
            new_var_name = new_var_name.replace("conv5/conv5_3", "conv2d_12")

            if 'fc' in new_var_name:
                # new_var_name = new_var_name.replace("feature/fc6", "dense")
                # new_var_name = new_var_name.replace("feature/fc7", "dense_1")
                # new_var_name = new_var_name.replace("fc8", "dense_2")
                continue

        #     print(new_var_name)
            re_var = tf.Variable(var, name=new_var_name)
            new_var_list.append(re_var)

        re_var = tf.Variable(tf.keras.initializers.he_uniform()([25088, 2048]), name="dense/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048]), name="dense/bias")
        new_var_list.append(re_var)

        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, 2048]), name="dense_1/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048]), name="dense_1/bias")
        new_var_list.append(re_var)

        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="dense_2/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="dense_2/bias")
        new_var_list.append(re_var)

        saver = tf.compat.v1.train.Saver(new_var_list)
        sess.run(tf.compat.v1.global_variables_initializer())
        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)


except_list = ['global_step', 'vgg_16/mean_rgb', 'vgg_16/fc8/biases', 'vgg_16/fc8/weights']
# http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
ckpt_path = './vgg_16.ckpt'
new_ckpt_path = './pretrain_weights.ckpt'
num_classes = 5
rename_var(ckpt_path, new_ckpt_path, num_classes)


================================================
FILE: tensorflow_classification/Test3_vgg/train.py
================================================
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from model import vgg
import tensorflow as tf
import json
import os


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 10

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(rescale=1. / 255,
                                               horizontal_flip=True)
    validation_image_generator = ImageDataGenerator(rescale=1. / 255)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    model = vgg("vgg16", im_height, im_width, num_classes=5)
    model.summary()

    # using keras high level api for training
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=["accuracy"])

    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG.h5',
                                                    save_best_only=True,
                                                    save_weights_only=True,
                                                    monitor='val_loss')]

    # tensorflow2.1 recommend to using fit
    history = model.fit(x=train_data_gen,
                        steps_per_epoch=total_train // batch_size,
                        epochs=epochs,
                        validation_data=val_data_gen,
                        validation_steps=total_val // batch_size,
                        callbacks=callbacks)

    # plot loss and accuracy image
    history_dict = history.history
    train_loss = history_dict["loss"]
    train_accuracy = history_dict["accuracy"]
    val_loss = history_dict["val_loss"]
    val_accuracy = history_dict["val_accuracy"]

    # figure 1
    plt.figure()
    plt.plot(range(epochs), train_loss, label='train_loss')
    plt.plot(range(epochs), val_loss, label='val_loss')
    plt.legend()
    plt.xlabel('epochs')
    plt.ylabel('loss')

    # figure 2
    plt.figure()
    plt.plot(range(epochs), train_accuracy, label='train_accuracy')
    plt.plot(range(epochs), val_accuracy, label='val_accuracy')
    plt.legend()
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test3_vgg/trainGPU.py
================================================
import matplotlib.pyplot as plt
from model import vgg
import tensorflow as tf
import json
import os
import time
import glob
import random
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def main():
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)
            exit(-1)

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 10

    # class dict
    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]
    class_num = len(data_class)
    class_dict = dict((value, index) for index, value in enumerate(data_class))

    # reverse value and key of dict
    inverse_dict = dict((val, key) for key, val in class_dict.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    # load train images list
    train_image_list = glob.glob(train_dir+"/*/*.jpg")
    random.shuffle(train_image_list)
    train_num = len(train_image_list)
    assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir)
    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]

    # load validation images list
    val_image_list = glob.glob(validation_dir+"/*/*.jpg")
    random.shuffle(val_image_list)
    val_num = len(val_image_list)
    assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir)
    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    def process_path(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        return image, label

    AUTOTUNE = tf.data.experimental.AUTOTUNE

    # load train dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))
    train_dataset = train_dataset.shuffle(buffer_size=train_num)\
                                 .map(process_path, num_parallel_calls=AUTOTUNE)\
                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)

    # load train dataset
    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))
    val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                             .repeat().batch(batch_size)

    # 实例化模型
    model = vgg("vgg16", 224, 224, 5)
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, predictions)

    @tf.function
    def test_step(images, labels):
        predictions = model(images, training=False)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

    best_test_loss = float('inf')
    train_step_num = train_num // batch_size
    val_step_num = val_num // batch_size
    for epoch in range(1, epochs+1):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        t1 = time.perf_counter()
        for index, (images, labels) in enumerate(train_dataset):
            train_step(images, labels)
            if index+1 == train_step_num:
                break
        print(time.perf_counter()-t1)

        for index, (images, labels) in enumerate(val_dataset):
            test_step(images, labels)
            if index+1 == val_step_num:
                break

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))
        if test_loss.result() < best_test_loss:
            model.save_weights("./save_weights/myVGG.ckpt".format(epoch), save_format='tf')

    # # using keras high level api for training
    # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    #               loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    #               metrics=["accuracy"])
    #
    # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG_{epoch}.h5',
    #                                                 save_best_only=True,
    #                                                 save_weights_only=True,
    #                                                 monitor='val_loss')]
    #
    # # tensorflow2.1 recommend to using fit
    # history = model.fit(x=train_dataset,
    #                     steps_per_epoch=train_num // batch_size,
    #                     epochs=epochs,
    #                     validation_data=val_dataset,
    #                     validation_steps=val_num // batch_size,
    #                     callbacks=callbacks)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test4_goolenet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: tensorflow_classification/Test4_goolenet/model.py
================================================
from tensorflow.keras import layers, models, Model, Sequential


def GoogLeNet(im_height=224, im_width=224, class_num=1000, aux_logits=False):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    # (None, 224, 224, 3)
    x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", activation="relu", name="conv2d_1")(input_image)
    # (None, 112, 112, 64)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x)
    # (None, 56, 56, 64)
    x = layers.Conv2D(64, kernel_size=1, activation="relu", name="conv2d_2")(x)
    # (None, 56, 56, 64)
    x = layers.Conv2D(192, kernel_size=3, padding="SAME", activation="relu", name="conv2d_3")(x)
    # (None, 56, 56, 192)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x)

    # (None, 28, 28, 192)
    x = Inception(64, 96, 128, 16, 32, 32, name="inception_3a")(x)
    # (None, 28, 28, 256)
    x = Inception(128, 128, 192, 32, 96, 64, name="inception_3b")(x)

    # (None, 28, 28, 480)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x)
    # (None, 14, 14, 480)
    x = Inception(192, 96, 208, 16, 48, 64, name="inception_4a")(x)
    if aux_logits:
        aux1 = InceptionAux(class_num, name="aux_1")(x)

    # (None, 14, 14, 512)
    x = Inception(160, 112, 224, 24, 64, 64, name="inception_4b")(x)
    # (None, 14, 14, 512)
    x = Inception(128, 128, 256, 24, 64, 64, name="inception_4c")(x)
    # (None, 14, 14, 512)
    x = Inception(112, 144, 288, 32, 64, 64, name="inception_4d")(x)
    if aux_logits:
        aux2 = InceptionAux(class_num, name="aux_2")(x)

    # (None, 14, 14, 528)
    x = Inception(256, 160, 320, 32, 128, 128, name="inception_4e")(x)
    # (None, 14, 14, 532)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_4")(x)

    # (None, 7, 7, 832)
    x = Inception(256, 160, 320, 32, 128, 128, name="inception_5a")(x)
    # (None, 7, 7, 832)
    x = Inception(384, 192, 384, 48, 128, 128, name="inception_5b")(x)
    # (None, 7, 7, 1024)
    x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x)

    # (None, 1, 1, 1024)
    x = layers.Flatten(name="output_flatten")(x)
    # (None, 1024)
    x = layers.Dropout(rate=0.4, name="output_dropout")(x)
    x = layers.Dense(class_num, name="output_dense")(x)
    # (None, class_num)
    aux3 = layers.Softmax(name="aux_3")(x)

    if aux_logits:
        model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])
    else:
        model = models.Model(inputs=input_image, outputs=aux3)
    return model


class Inception(layers.Layer):
    def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation="relu")

        self.branch2 = Sequential([
            layers.Conv2D(ch3x3red, kernel_size=1, activation="relu"),
            layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", activation="relu")])      # output_size= input_size

        self.branch3 = Sequential([
            layers.Conv2D(ch5x5red, kernel_size=1, activation="relu"),
            layers.Conv2D(ch5x5, kernel_size=5, padding="SAME", activation="relu")])      # output_size= input_size

        self.branch4 = Sequential([
            layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"),  # caution: default strides==pool_size
            layers.Conv2D(pool_proj, kernel_size=1, activation="relu")])                  # output_size= input_size

    def call(self, inputs, **kwargs):
        branch1 = self.branch1(inputs)
        branch2 = self.branch2(inputs)
        branch3 = self.branch3(inputs)
        branch4 = self.branch4(inputs)
        outputs = layers.concatenate([branch1, branch2, branch3, branch4])
        return outputs


class InceptionAux(layers.Layer):
    def __init__(self, num_classes, **kwargs):
        super(InceptionAux, self).__init__(**kwargs)
        self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)
        self.conv = layers.Conv2D(128, kernel_size=1, activation="relu")

        self.fc1 = layers.Dense(1024, activation="relu")
        self.fc2 = layers.Dense(num_classes)
        self.softmax = layers.Softmax()

    def call(self, inputs, **kwargs):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(inputs)
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = layers.Flatten()(x)
        x = layers.Dropout(rate=0.5)(x)
        # N x 2048
        x = self.fc1(x)
        x = layers.Dropout(rate=0.5)(x)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        x = self.softmax(x)

        return x


================================================
FILE: tensorflow_classification/Test4_goolenet/model_add_bn.py
================================================
from tensorflow.keras import layers, models, Model, Sequential


def InceptionV1(im_height=224, im_width=224, class_num=1000, aux_logits=False):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    # (None, 224, 224, 3)
    x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", use_bias=False, name="conv1/conv")(input_image)
    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/bn")(x)
    x = layers.ReLU()(x)
    # (None, 112, 112, 64)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x)
    # (None, 56, 56, 64)
    x = layers.Conv2D(64, kernel_size=1, use_bias=False, name="conv2/conv")(x)
    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv2/bn")(x)
    x = layers.ReLU()(x)
    # (None, 56, 56, 64)
    x = layers.Conv2D(192, kernel_size=3, padding="SAME", use_bias=False, name="conv3/conv")(x)
    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv3/bn")(x)
    x = layers.ReLU()(x)
    # (None, 56, 56, 192)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x)

    # (None, 28, 28, 192)
    x = Inception(64, 96, 128, 16, 32, 32, name="inception3a")(x)
    # (None, 28, 28, 256)
    x = Inception(128, 128, 192, 32, 96, 64, name="inception3b")(x)

    # (None, 28, 28, 480)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x)
    # (None, 14, 14, 480)
    x = Inception(192, 96, 208, 16, 48, 64, name="inception4a")(x)
    if aux_logits:
        aux1 = InceptionAux(class_num, name="aux1")(x)

    # (None, 14, 14, 512)
    x = Inception(160, 112, 224, 24, 64, 64, name="inception4b")(x)
    # (None, 14, 14, 512)
    x = Inception(128, 128, 256, 24, 64, 64, name="inception4c")(x)
    # (None, 14, 14, 512)
    x = Inception(112, 144, 288, 32, 64, 64, name="inception4d")(x)
    if aux_logits:
        aux2 = InceptionAux(class_num, name="aux2")(x)

    # (None, 14, 14, 528)
    x = Inception(256, 160, 320, 32, 128, 128, name="inception4e")(x)
    # (None, 14, 14, 532)
    x = layers.MaxPool2D(pool_size=2, strides=2, padding="SAME", name="maxpool_4")(x)

    # (None, 7, 7, 832)
    x = Inception(256, 160, 320, 32, 128, 128, name="inception5a")(x)
    # (None, 7, 7, 832)
    x = Inception(384, 192, 384, 48, 128, 128, name="inception5b")(x)
    # (None, 7, 7, 1024)
    x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x)

    # (None, 1, 1, 1024)
    x = layers.Flatten(name="output_flatten")(x)
    # (None, 1024)
    x = layers.Dropout(rate=0.4, name="output_dropout")(x)
    x = layers.Dense(class_num, name="fc")(x)
    # (None, class_num)
    aux3 = layers.Softmax()(x)

    if aux_logits:
        model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])
    else:
        model = models.Model(inputs=input_image, outputs=aux3)
    return model


class Inception(layers.Layer):
    def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.branch1 = Sequential([
            layers.Conv2D(ch1x1, kernel_size=1, use_bias=False, name="conv"),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="bn"),
            layers.ReLU()], name="branch1")

        self.branch2 = Sequential([
            layers.Conv2D(ch3x3red, kernel_size=1, use_bias=False, name="0/conv"),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="0/bn"),
            layers.ReLU(),
            layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", use_bias=False, name="1/conv"),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="1/bn"),
            layers.ReLU()], name="branch2")      # output_size= input_size

        self.branch3 = Sequential([
            layers.Conv2D(ch5x5red, kernel_size=1, use_bias=False, name="0/conv"),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="0/bn"),
            layers.ReLU(),
            layers.Conv2D(ch5x5, kernel_size=3, padding="SAME", use_bias=False, name="1/conv"),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="1/bn"),
            layers.ReLU()], name="branch3")      # output_size= input_size

        self.branch4 = Sequential([
            layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"),  # caution: default strides==pool_size
            layers.Conv2D(pool_proj, kernel_size=1, use_bias=False, name="1/conv"),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="1/bn"),
            layers.ReLU()], name="branch4")                  # output_size= input_size

    def call(self, inputs, **kwargs):
        branch1 = self.branch1(inputs)
        branch2 = self.branch2(inputs)
        branch3 = self.branch3(inputs)
        branch4 = self.branch4(inputs)
        outputs = layers.concatenate([branch1, branch2, branch3, branch4])
        return outputs


class InceptionAux(layers.Layer):
    def __init__(self, num_classes, **kwargs):
        super(InceptionAux, self).__init__(**kwargs)
        self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)
        self.conv = layers.Conv2D(128, kernel_size=1, use_bias=False, name="conv/conv")
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv/bn")
        self.rule1 = layers.ReLU()

        self.fc1 = layers.Dense(1024, activation="relu", name="fc1")
        self.fc2 = layers.Dense(num_classes, name="fc2")
        self.softmax = layers.Softmax()

    def call(self, inputs, **kwargs):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(inputs)
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        x = self.bn1(x)
        x = self.rule1(x)
        # N x 128 x 4 x 4
        x = layers.Flatten()(x)
        x = layers.Dropout(rate=0.5)(x)
        # N x 2048
        x = self.fc1(x)
        x = layers.Dropout(rate=0.5)(x)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        x = self.softmax(x)

        return x


================================================
FILE: tensorflow_classification/Test4_goolenet/predict.py
================================================
import os
import glob
import json

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

from model import GoogLeNet


def main():
    im_height = 224
    im_width = 224

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # scaling pixel value and normalize
    img = ((np.array(img) / 255.) - 0.5) / 0.5

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    model = GoogLeNet(class_num=5, aux_logits=False)
    model.summary()
    # model.load_weights("./save_weights/myGoogLenet.h5", by_name=True)  # h5 format
    weights_path = "./save_weights/myGoogLeNet.ckpt"
    assert len(glob.glob(weights_path + "*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == "__main__":
    main()


================================================
FILE: tensorflow_classification/Test4_goolenet/read_pth.py
================================================
import torch
import numpy as np
import tensorflow as tf


def rename_var(pth_path, new_ckpt_path, num_classes):
    pytorch_dict = torch.load(pth_path)

    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        new_var_list = []

        for key, value in pytorch_dict.items():
            if key in except_list:
                continue

            new_name = key
            value = value.detach().numpy()

            new_name = new_name.replace(".", "/")

            # 将卷积核的通道顺序由pytorch调整到tensorflow
            if 'conv/weight' in new_name:
                new_name = new_name.replace("weight", "kernel")
                value = np.transpose(value, (2, 3, 1, 0)).astype(np.float32)
            elif 'bn' in new_name:
                if "num_batches_tracked" in new_name:
                    continue

                new_name = new_name.replace("weight", "gamma")
                new_name = new_name.replace("bias", "beta")
                new_name = new_name.replace("running_mean", "moving_mean")
                new_name = new_name.replace("running_var", "moving_variance")

                value = np.transpose(value).astype(np.float32)
            elif 'fc1' in new_name:
                new_name = new_name.replace("weight", "kernel")
                value = np.transpose(value).astype(np.float32)

            re_var = tf.Variable(value, name=new_name)
            new_var_list.append(re_var)

        # aux1
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name="aux1/fc2/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="aux1/fc2/bias")
        new_var_list.append(re_var)

        # aux2
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name="aux2/fc2/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="aux2/fc2/bias")
        new_var_list.append(re_var)

        # fc
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name="fc/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="fc/bias")
        new_var_list.append(re_var)

        saver = tf.compat.v1.train.Saver(new_var_list)
        sess.run(tf.compat.v1.global_variables_initializer())
        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)


# this script only use for model_add_bn.py
except_list = ['aux1.fc2.weight', 'aux1.fc2.bias', 'aux2.fc2.weight', 'aux2.fc2.bias', 'fc.weight', 'fc.bias']
# https://download.pytorch.org/models/googlenet-1378be20.pth
pth_path = './googlenet-1378be20.pth'
new_ckpt_path = './pretrain_weights.ckpt'
num_classes = 5
rename_var(pth_path, new_ckpt_path, num_classes)


================================================
FILE: tensorflow_classification/Test4_goolenet/train.py
================================================
import os
import sys
import json

import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from model import GoogLeNet


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 30

    def pre_function(img):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img / 255.
        img = (img - 0.5) * 2.0

        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(preprocessing_function=pre_function,
                                               horizontal_flip=True)
    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    model = GoogLeNet(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True)
    # model.build((batch_size, 224, 224, 3))  # when using subclass model
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            aux1, aux2, output = model(images, training=True)
            loss1 = loss_object(labels, aux1)
            loss2 = loss_object(labels, aux2)
            loss3 = loss_object(labels, output)
            loss = loss1 * 0.3 + loss2 * 0.3 + loss3
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def val_step(images, labels):
        _, _, output = model(images, training=False)
        loss = loss_object(labels, output)

        val_loss(loss)
        val_accuracy(labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)
        for step in train_bar:
            images, labels = next(train_data_gen)
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # validate
        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)
        for step in val_bar:
            val_images, val_labels = next(val_data_gen)
            val_step(val_images, val_labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            model.save_weights("./save_weights/myGoogLeNet.ckpt")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test4_goolenet/trainGPU.py
================================================
import matplotlib.pyplot as plt
from model import GoogLeNet
import tensorflow as tf
import json
import os
import time
import glob
import random
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def main():
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)
            exit(-1)

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 30

    # class dict
    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]
    class_num = len(data_class)
    class_dict = dict((value, index) for index, value in enumerate(data_class))

    # reverse value and key of dict
    inverse_dict = dict((val, key) for key, val in class_dict.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    # load train images list
    train_image_list = glob.glob(train_dir+"/*/*.jpg")
    random.shuffle(train_image_list)
    train_num = len(train_image_list)
    assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir)
    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]

    # load validation images list
    val_image_list = glob.glob(validation_dir+"/*/*.jpg")
    random.shuffle(val_image_list)
    val_num = len(val_image_list)
    assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir)
    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    def process_train_img(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.random_flip_left_right(image)
        image = (image - 0.5) / 0.5
        return image, label

    def process_val_img(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        image = (image - 0.5) / 0.5
        return image, label

    AUTOTUNE = tf.data.experimental.AUTOTUNE

    # load train dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))
    train_dataset = train_dataset.shuffle(buffer_size=train_num)\
                                 .map(process_train_img, num_parallel_calls=AUTOTUNE)\
                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)

    # load train dataset
    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))
    val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                             .repeat().batch(batch_size)

    # 实例化模型
    model = GoogLeNet(im_height=224, im_width=224, class_num=5, aux_logits=True)
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            aux1, aux2, output = model(images, training=True)
            loss1 = loss_object(labels, aux1)
            loss2 = loss_object(labels, aux2)
            loss3 = loss_object(labels, output)
            loss = loss1 * 0.3 + loss2 * 0.3 + loss3
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def test_step(images, labels):
        _, _, output = model(images, training=False)
        t_loss = loss_object(labels, output)

        test_loss(t_loss)
        test_accuracy(labels, output)

    best_test_loss = float('inf')
    train_step_num = train_num // batch_size
    val_step_num = val_num // batch_size
    for epoch in range(1, epochs+1):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        t1 = time.perf_counter()
        for index, (images, labels) in enumerate(train_dataset):
            train_step(images, labels)
            if index+1 == train_step_num:
                break
        print(time.perf_counter()-t1)

        for index, (images, labels) in enumerate(val_dataset):
            test_step(images, labels)
            if index+1 == val_step_num:
                break

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))
        if test_loss.result() < best_test_loss:
            model.save_weights("./save_weights/myGoogLeNet.ckpt".format(epoch), save_format='tf')


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test4_goolenet/train_add_bn.py
================================================
import os
import sys
import json
import glob

import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from model_add_bn import InceptionV1


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 16
    epochs = 30

    def pre_function(img: np.ndarray):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img / 255.
        img = img - [0.485, 0.456, 0.406]
        img = img / [0.229, 0.224, 0.225]

        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(preprocessing_function=pre_function,
                                               horizontal_flip=True)
    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    model = InceptionV1(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True)
    # model.build((batch_size, 224, 224, 3))  # when using subclass model

    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path)
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            aux1, aux2, output = model(images, training=True)
            loss1 = loss_object(labels, aux1)
            loss2 = loss_object(labels, aux2)
            loss3 = loss_object(labels, output)
            loss = loss1 * 0.3 + loss2 * 0.3 + loss3
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def val_step(images, labels):
        _, _, output = model(images, training=False)
        loss = loss_object(labels, output)

        val_loss(loss)
        val_accuracy(labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)
        for step in train_bar:
            images, labels = next(train_data_gen)
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # validate
        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)
        for step in val_bar:
            val_images, val_labels = next(val_data_gen)
            val_step(val_images, val_labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            model.save_weights("./save_weights/myInceptionV1.ckpt")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test5_resnet/batch_predict.py
================================================
import os
import json
import glob

import tensorflow as tf
import numpy as np
from PIL import Image

from model import resnet50


def main():
    im_height = 224
    im_width = 224
    num_classes = 5

    _R_MEAN = 123.68
    _G_MEAN = 116.78
    _B_MEAN = 103.94

    # load images
    # 指向需要遍历预测的图像文件夹
    imgs_root = "/data/imgs"
    assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist."
    # 读取指定文件夹下所有jpg图像路径
    img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")]

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), f"file: '{json_path}' dose not exist."

    json_file = open(json_path, "r")
    class_indict = json.load(json_file)

    # create model
    feature = resnet50(num_classes=num_classes, include_top=False)
    feature.trainable = False
    model = tf.keras.Sequential([feature,
                                 tf.keras.layers.GlobalAvgPool2D(),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(1024, activation="relu"),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(num_classes),
                                 tf.keras.layers.Softmax()])

    # load weights
    weights_path = './save_weights/resNet_50.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    batch_size = 8  # 每次预测时将多少张图片打包成一个batch
    for ids in range(0, len(img_path_list) // batch_size):
        img_list = []
        for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:
            assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
            img = Image.open(img_path)
            # resize image to 224x224
            img = img.resize((im_width, im_height))

            # scaling pixel value to (0-1)
            img = np.array(img).astype(np.float32)
            img = img - [_R_MEAN, _G_MEAN, _B_MEAN]
            img_list.append(img)

        # batch images
        # 将img_list列表中的所有图像打包成一个batch
        batch_img = np.stack(img_list, axis=0)

        # prediction
        result = model.predict(batch_img)
        predict_classes = np.argmax(result, axis=1)

        for index, class_index in enumerate(predict_classes):
            print_res = "image: {}  class: {}   prob: {:.3}".format(img_path_list[ids * batch_size + index],
                                                                    class_indict[str(class_index)],
                                                                    result[index][class_index])
            print(print_res)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test5_resnet/class_indices.json
================================================
{
    "0": "daisy",
    "1": "dandelion",
    "2": "roses",
    "3": "sunflowers",
    "4": "tulips"
}

================================================
FILE: tensorflow_classification/Test5_resnet/model.py
================================================
from tensorflow.keras import layers, Model, Sequential


class BasicBlock(layers.Layer):
    expansion = 1

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides,
                                   padding="SAME", use_bias=False)
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1,
                                   padding="SAME", use_bias=False)
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.downsample = downsample
        self.relu = layers.ReLU()
        self.add = layers.Add()

    def call(self, inputs, training=False):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        x = self.add([identity, x])
        x = self.relu(x)

        return x


class Bottleneck(layers.Layer):
    """
    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
    这么做的好处是能够在top1上提升大概0.5%的准确率。
    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
    """
    expansion = 4

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):
        super(Bottleneck, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name="conv1")
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm")
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False,
                                   strides=strides, padding="SAME", name="conv2")
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv2/BatchNorm")
        # -----------------------------------------
        self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name="conv3")
        self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv3/BatchNorm")
        # -----------------------------------------
        self.relu = layers.ReLU()
        self.downsample = downsample
        self.add = layers.Add()

    def call(self, inputs, training=False):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = self.relu(x)

        x = self.conv3(x)
        x = self.bn3(x, training=training)

        x = self.add([x, identity])
        x = self.relu(x)

        return x


def _make_layer(block, in_channel, channel, block_num, name, strides=1):
    downsample = None
    if strides != 1 or in_channel != channel * block.expansion:
        downsample = Sequential([
            layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides,
                          use_bias=False, name="conv1"),
            layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="BatchNorm")
        ], name="shortcut")

    layers_list = []
    layers_list.append(block(channel, downsample=downsample, strides=strides, name="unit_1"))

    for index in range(1, block_num):
        layers_list.append(block(channel, name="unit_" + str(index + 1)))

    return Sequential(layers_list, name=name)


def _resnet(block, blocks_num, im_width=224, im_height=224, num_classes=1000, include_top=True):
    # tensorflow中的tensor通道排序是NHWC
    # (None, 224, 224, 3)
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    x = layers.Conv2D(filters=64, kernel_size=7, strides=2,
                      padding="SAME", use_bias=False, name="conv1")(input_image)
    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm")(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME")(x)

    x = _make_layer(block, x.shape[-1], 64, blocks_num[0], name="block1")(x)
    x = _make_layer(block, x.shape[-1], 128, blocks_num[1], strides=2, name="block2")(x)
    x = _make_layer(block, x.shape[-1], 256, blocks_num[2], strides=2, name="block3")(x)
    x = _make_layer(block, x.shape[-1], 512, blocks_num[3], strides=2, name="block4")(x)

    if include_top:
        x = layers.GlobalAvgPool2D()(x)  # pool + flatten
        x = layers.Dense(num_classes, name="logits")(x)
        predict = layers.Softmax()(x)
    else:
        predict = x

    model = Model(inputs=input_image, outputs=predict)

    return model


def resnet34(im_width=224, im_height=224, num_classes=1000, include_top=True):
    return _resnet(BasicBlock, [3, 4, 6, 3], im_width, im_height, num_classes, include_top)


def resnet50(im_width=224, im_height=224, num_classes=1000, include_top=True):
    return _resnet(Bottleneck, [3, 4, 6, 3], im_width, im_height, num_classes, include_top)


def resnet101(im_width=224, im_height=224, num_classes=1000, include_top=True):
    return _resnet(Bottleneck, [3, 4, 23, 3], im_width, im_height, num_classes, include_top)


================================================
FILE: tensorflow_classification/Test5_resnet/predict.py
================================================
import os
import json
import glob

import tensorflow as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

from model import resnet50


def main():
    im_height = 224
    im_width = 224
    num_classes = 5

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # scaling pixel value to (0-1)
    _R_MEAN = 123.68
    _G_MEAN = 116.78
    _B_MEAN = 103.94
    img = np.array(img).astype(np.float32)
    img = img - [_R_MEAN, _G_MEAN, _B_MEAN]

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    feature = resnet50(num_classes=num_classes, include_top=False)
    feature.trainable = False
    model = tf.keras.Sequential([feature,
                                 tf.keras.layers.GlobalAvgPool2D(),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(1024, activation="relu"),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(num_classes),
                                 tf.keras.layers.Softmax()])

    # load weights
    weights_path = './save_weights/resNet_50.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    # prediction
    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test5_resnet/read_ckpt.py
================================================
"""
可直接下载我转好的权重
链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg  密码: u199
"""
import tensorflow as tf


def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list):
    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        var_list = tf.train.list_variables(ckpt_path)
        new_var_list = []

        for var_name, shape in var_list:
            print(var_name)
            if var_name in except_list:
                continue
            var = tf.train.load_variable(ckpt_path, var_name)
            new_var_name = var_name.replace('resnet_v1_50/', "")
            new_var_name = new_var_name.replace("bottleneck_v1/", "")
            new_var_name = new_var_name.replace("shortcut/weights", "shortcut/conv1/kernel")
            new_var_name = new_var_name.replace("weights", "kernel")
            new_var_name = new_var_name.replace("biases", "bias")
            re_var = tf.Variable(var, name=new_var_name)
            new_var_list.append(re_var)

        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="logits/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="logits/bias")
        new_var_list.append(re_var)
        saver = tf.compat.v1.train.Saver(new_var_list)
        sess.run(tf.compat.v1.global_variables_initializer())
        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)


def main():
    except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights']
    ckpt_path = './resnet_v1_50.ckpt'
    new_ckpt_path = './pretrain_weights.ckpt'
    num_classes = 5
    rename_var(ckpt_path, new_ckpt_path, num_classes, except_list)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test5_resnet/read_h5.py
================================================
import h5py

f = h5py.File('./save_weights/resNet_1.h5', 'r')
for root_name, g in f.items():
    print(root_name)
    for _, weights_dirs in g.attrs.items():
        for i in weights_dirs:
            name = root_name + "/" + str(i, encoding="utf-8")
            data = f[name]
            print(data.value)


================================================
FILE: tensorflow_classification/Test5_resnet/subclassed_model.py
================================================
from tensorflow.keras import layers, Model, Sequential


class BasicBlock(layers.Layer):
    expansion = 1

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides,
                                   padding="SAME", use_bias=False)
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1,
                                   padding="SAME", use_bias=False)
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.downsample = downsample
        self.relu = layers.ReLU()
        self.add = layers.Add()

    def call(self, inputs, training=False, **kwargs):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        x = self.add([identity, x])
        x = self.relu(x)

        return x


class Bottleneck(layers.Layer):
    """
    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
    这么做的好处是能够在top1上提升大概0.5%的准确率。
    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
    """
    expansion = 4

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):
        super(Bottleneck, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name="conv1")
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv1/BatchNorm")
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False,
                                   strides=strides, padding="SAME", name="conv2")
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv2/BatchNorm")
        # -----------------------------------------
        self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name="conv3")
        self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv3/BatchNorm")
        # -----------------------------------------
        self.relu = layers.ReLU()
        self.downsample = downsample
        self.add = layers.Add()

    def call(self, inputs, training=False, **kwargs):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = self.relu(x)

        x = self.conv3(x)
        x = self.bn3(x, training=training)

        x = self.add([x, identity])
        x = self.relu(x)

        return x


class ResNet(Model):
    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, **kwargs):
        super(ResNet, self).__init__(**kwargs)
        self.include_top = include_top
        self.conv1 = layers.Conv2D(filters=64, kernel_size=7, strides=2, padding="SAME",
                                   use_bias=False, name="conv1")
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv1/BatchNorm")
        self.relu1 = layers.ReLU(name="relu1")
        self.maxpool1 = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool1")

        self.block1 = self._make_layer(block, True, 64, blocks_num[0], name="block1")
        self.block2 = self._make_layer(block, False, 128, blocks_num[1], strides=2, name="block2")
        self.block3 = self._make_layer(block, False, 256, blocks_num[2], strides=2, name="block3")
        self.block4 = self._make_layer(block, False, 512, blocks_num[3], strides=2, name="block4")

        if self.include_top:
            self.avgpool = layers.GlobalAvgPool2D(name="avgpool1")
            self.fc = layers.Dense(num_classes, name="logits")
            self.softmax = layers.Softmax()

    def call(self, inputs, training=False, **kwargs):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu1(x)
        x = self.maxpool1(x)

        x = self.block1(x, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.block4(x, training=training)

        if self.include_top:
            x = self.avgpool(x)
            x = self.fc(x)
            x = self.softmax(x)

        return x

    def _make_layer(self, block, first_block, channel, block_num, name=None, strides=1):
        downsample = None
        if strides != 1 or first_block is True:
            downsample = Sequential([
                layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides,
                              use_bias=False, name="conv1"),
                layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="BatchNorm")
            ], name="shortcut")

        layers_list = []
        layers_list.append(block(channel, downsample=downsample, strides=strides, name="unit_1"))

        for index in range(1, block_num):
            layers_list.append(block(channel, name="unit_" + str(index + 1)))

        return Sequential(layers_list, name=name)


def resnet34(num_classes=1000, include_top=True):
    block = BasicBlock
    block_num = [3, 4, 6, 3]
    return ResNet(block, block_num, num_classes, include_top)


def resnet101(num_classes=1000, include_top=True):
    block = Bottleneck
    blocks_num = [3, 4, 23, 3]
    return ResNet(block, blocks_num, num_classes, include_top)


================================================
FILE: tensorflow_classification/Test5_resnet/train.py
================================================
import os
import sys
import glob
import json

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm

from model import resnet50


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    im_height = 224
    im_width = 224
    batch_size = 16
    epochs = 20
    num_classes = 5

    _R_MEAN = 123.68
    _G_MEAN = 116.78
    _B_MEAN = 103.94

    def pre_function(img):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img - [_R_MEAN, _G_MEAN, _B_MEAN]

        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(horizontal_flip=True,
                                               preprocessing_function=pre_function)

    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    # img, _ = next(train_data_gen)
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    feature = resnet50(num_classes=5, include_top=False)
    # feature.build((None, 224, 224, 3))  # when using subclass model

    # 直接下载我转好的权重
    # download weights 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg  密码: u199
    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path)
    feature.load_weights(pre_weights_path)
    feature.trainable = False
    feature.summary()

    model = tf.keras.Sequential([feature,
                                 tf.keras.layers.GlobalAvgPool2D(),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(1024, activation="relu"),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(num_classes),
                                 tf.keras.layers.Softmax()])
    # model.build((None, 224, 224, 3))
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            output = model(images, training=True)
            loss = loss_object(labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def val_step(images, labels):
        output = model(images, training=False)
        loss = loss_object(labels, output)

        val_loss(loss)
        val_accuracy(labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)
        for step in train_bar:
            images, labels = next(train_data_gen)
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # validate
        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)
        for step in val_bar:
            test_images, test_labels = next(val_data_gen)
            val_step(test_images, test_labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            model.save_weights("./save_weights/resNet_50.ckpt", save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test5_resnet/trainGPU.py
================================================
import matplotlib.pyplot as plt
from model import resnet50
import tensorflow as tf
import json
import os
import time
import glob
import random
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def main():
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)
            exit(-1)

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224

    _R_MEAN = 123.68
    _G_MEAN = 116.78
    _B_MEAN = 103.94

    batch_size = 32
    epochs = 30

    # class dict
    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]
    class_num = len(data_class)
    class_dict = dict((value, index) for index, value in enumerate(data_class))

    # reverse value and key of dict
    inverse_dict = dict((val, key) for key, val in class_dict.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    # load train images list
    random.seed(0)
    train_image_list = glob.glob(train_dir+"/*/*.jpg")
    random.shuffle(train_image_list)
    train_num = len(train_image_list)
    assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir)
    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]

    # load validation images list
    val_image_list = glob.glob(validation_dir+"/*/*.jpg")
    random.shuffle(val_image_list)
    val_num = len(val_image_list)
    assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir)
    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    def process_train_img(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        # image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.random_flip_left_right(image)
        # image = (image - 0.5) / 0.5
        image = image - [_R_MEAN, _G_MEAN, _B_MEAN]
        return image, label

    def process_val_img(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        # image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        # image = (image - 0.5) / 0.5
        image = image - [_R_MEAN, _G_MEAN, _B_MEAN]
        return image, label

    AUTOTUNE = tf.data.experimental.AUTOTUNE

    # load train dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))
    train_dataset = train_dataset.shuffle(buffer_size=train_num)\
                                 .map(process_train_img, num_parallel_calls=AUTOTUNE)\
                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)

    # load train dataset
    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))
    val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                             .repeat().batch(batch_size)

    # 实例化模型
    feature = resnet50(num_classes=5, include_top=False)
    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path + "*")), "cannot find {}".format(pre_weights_path)
    feature.load_weights(pre_weights_path)
    feature.trainable = False

    model = tf.keras.Sequential([feature,
                                 tf.keras.layers.GlobalAvgPool2D(),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(1024, activation="relu"),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(5),
                                 tf.keras.layers.Softmax()])

    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            output = model(images, training=True)
            loss = loss_object(labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def test_step(images, labels):
        output = model(images, training=False)
        t_loss = loss_object(labels, output)

        test_loss(t_loss)
        test_accuracy(labels, output)

    best_test_loss = float('inf')
    train_step_num = train_num // batch_size
    val_step_num = val_num // batch_size
    for epoch in range(1, epochs+1):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        t1 = time.perf_counter()
        for index, (images, labels) in enumerate(train_dataset):
            train_step(images, labels)
            if index+1 == train_step_num:
                break
        print(time.perf_counter()-t1)

        for index, (images, labels) in enumerate(val_dataset):
            test_step(images, labels)
            if index+1 == val_step_num:
                break

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))
        if test_loss.result() < best_test_loss:
            model.save_weights("./save_weights/myResNet.ckpt", save_format='tf')


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/model_v2.py
================================================
from tensorflow.keras import layers, Model, Sequential


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNReLU(layers.Layer):
    def __init__(self, out_channel, kernel_size=3, stride=1, **kwargs):
        super(ConvBNReLU, self).__init__(**kwargs)
        self.conv = layers.Conv2D(filters=out_channel, kernel_size=kernel_size,
                                  strides=stride, padding='SAME', use_bias=False, name='Conv2d')
        self.bn = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='BatchNorm')
        self.activation = layers.ReLU(max_value=6.0)

    def call(self, inputs, training=False):
        x = self.conv(inputs)
        x = self.bn(x, training=training)
        x = self.activation(x)
        return x


class InvertedResidual(layers.Layer):
    def __init__(self, in_channel, out_channel, stride, expand_ratio, **kwargs):
        super(InvertedResidual, self).__init__(**kwargs)
        self.hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel

        layer_list = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layer_list.append(ConvBNReLU(out_channel=self.hidden_channel, kernel_size=1, name='expand'))

        layer_list.extend([
            # 3x3 depthwise conv
            layers.DepthwiseConv2D(kernel_size=3, padding='SAME', strides=stride,
                                   use_bias=False, name='depthwise'),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='depthwise/BatchNorm'),
            layers.ReLU(max_value=6.0),
            # 1x1 pointwise conv(linear)
            layers.Conv2D(filters=out_channel, kernel_size=1, strides=1,
                          padding='SAME', use_bias=False, name='project'),
            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='project/BatchNorm')
        ])
        self.main_branch = Sequential(layer_list, name='expanded_conv')

    def call(self, inputs, training=False, **kwargs):
        if self.use_shortcut:
            return inputs + self.main_branch(inputs, training=training)
        else:
            return self.main_branch(inputs, training=training)


def MobileNetV2(im_height=224,
                im_width=224,
                num_classes=1000,
                alpha=1.0,
                round_nearest=8,
                include_top=True):
    block = InvertedResidual
    input_channel = _make_divisible(32 * alpha, round_nearest)
    last_channel = _make_divisible(1280 * alpha, round_nearest)
    inverted_residual_setting = [
        # t, c, n, s
        [1, 16, 1, 1],
        [6, 24, 2, 2],
        [6, 32, 3, 2],
        [6, 64, 4, 2],
        [6, 96, 3, 1],
        [6, 160, 3, 2],
        [6, 320, 1, 1],
    ]

    input_image = layers.Input(shape=(im_height, im_width, 3), dtype='float32')
    # conv1
    x = ConvBNReLU(input_channel, stride=2, name='Conv')(input_image)
    # building inverted residual residual blockes
    for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
        output_channel = _make_divisible(c * alpha, round_nearest)
        for i in range(n):
            stride = s if i == 0 else 1
            x = block(x.shape[-1],
                      output_channel,
                      stride,
                      expand_ratio=t)(x)
    # building last several layers
    x = ConvBNReLU(last_channel, kernel_size=1, name='Conv_1')(x)

    if include_top is True:
        # building classifier
        x = layers.GlobalAveragePooling2D()(x)  # pool + flatten
        x = layers.Dropout(0.2)(x)
        output = layers.Dense(num_classes, name='Logits')(x)
    else:
        output = x

    model = Model(inputs=input_image, outputs=output)
    return model


================================================
FILE: tensorflow_classification/Test6_mobilenet/model_v3.py
================================================
from typing import Union
from functools import partial
from tensorflow.keras import layers, Model


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


def correct_pad(input_size: Union[int, tuple], kernel_size: int):
    """Returns a tuple for zero-padding for 2D convolution with downsampling.

    Arguments:
      input_size: Input tensor size.
      kernel_size: An integer or tuple/list of 2 integers.

    Returns:
      A tuple.
    """

    if isinstance(input_size, int):
        input_size = (input_size, input_size)

    kernel_size = (kernel_size, kernel_size)

    adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
    correct = (kernel_size[0] // 2, kernel_size[1] // 2)
    return ((correct[0] - adjust[0], correct[0]),
            (correct[1] - adjust[1], correct[1]))


class HardSigmoid(layers.Layer):
    def __init__(self, **kwargs):
        super(HardSigmoid, self).__init__(**kwargs)
        self.relu6 = layers.ReLU(6.)

    def call(self, inputs, **kwargs):
        x = self.relu6(inputs + 3) * (1. / 6)
        return x


class HardSwish(layers.Layer):
    def __init__(self, **kwargs):
        super(HardSwish, self).__init__(**kwargs)
        self.hard_sigmoid = HardSigmoid()

    def call(self, inputs, **kwargs):
        x = self.hard_sigmoid(inputs) * inputs
        return x


def _se_block(inputs, filters, prefix, se_ratio=1 / 4.):
    # [batch, height, width, channel] -> [batch, channel]
    x = layers.GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs)

    # Target shape. Tuple of integers, does not include the samples dimension (batch size).
    # [batch, channel] -> [batch, 1, 1, channel]
    x = layers.Reshape((1, 1, filters))(x)

    # fc1
    x = layers.Conv2D(filters=_make_divisible(filters * se_ratio),
                      kernel_size=1,
                      padding='same',
                      name=prefix + 'squeeze_excite/Conv')(x)
    x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x)

    # fc2
    x = layers.Conv2D(filters=filters,
                      kernel_size=1,
                      padding='same',
                      name=prefix + 'squeeze_excite/Conv_1')(x)
    x = HardSigmoid(name=prefix + 'squeeze_excite/HardSigmoid')(x)

    x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])
    return x


def _inverted_res_block(x,
                        input_c: int,      # input channel
                        kernel_size: int,  # kennel size
                        exp_c: int,        # expanded channel
                        out_c: int,        # out channel
                        use_se: bool,      # whether using SE
                        activation: str,   # RE or HS
                        stride: int,
                        block_id: int,
                        alpha: float = 1.0):

    bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99)

    input_c = _make_divisible(input_c * alpha)
    exp_c = _make_divisible(exp_c * alpha)
    out_c = _make_divisible(out_c * alpha)

    act = layers.ReLU if activation == "RE" else HardSwish

    shortcut = x
    prefix = 'expanded_conv/'
    if block_id:
        # expand channel
        prefix = 'expanded_conv_{}/'.format(block_id)
        x = layers.Conv2D(filters=exp_c,
                          kernel_size=1,
                          padding='same',
                          use_bias=False,
                          name=prefix + 'expand')(x)
        x = bn(name=prefix + 'expand/BatchNorm')(x)
        x = act(name=prefix + 'expand/' + act.__name__)(x)

    if stride == 2:
        input_size = (x.shape[1], x.shape[2])  # height, width
        x = layers.ZeroPadding2D(padding=correct_pad(input_size, kernel_size),
                                 name=prefix + 'depthwise/pad')(x)

    x = layers.DepthwiseConv2D(kernel_size=kernel_size,
                               strides=stride,
                               padding='same' if stride == 1 else 'valid',
                               use_bias=False,
                               name=prefix + 'depthwise')(x)
    x = bn(name=prefix + 'depthwise/BatchNorm')(x)
    x = act(name=prefix + 'depthwise/' + act.__name__)(x)

    if use_se:
        x = _se_block(x, filters=exp_c, prefix=prefix)

    x = layers.Conv2D(filters=out_c,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name=prefix + 'project')(x)
    x = bn(name=prefix + 'project/BatchNorm')(x)

    if stride == 1 and input_c == out_c:
        x = layers.Add(name=prefix + 'Add')([shortcut, x])

    return x


def mobilenet_v3_large(input_shape=(224, 224, 3),
                       num_classes=1000,
                       alpha=1.0,
                       include_top=True):
    """
    download weights url:
    链接: https://pan.baidu.com/s/13uJznKeqHkjUp72G_gxe8Q  密码: 8quu
    """
    bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99)
    img_input = layers.Input(shape=input_shape)

    x = layers.Conv2D(filters=16,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='same',
                      use_bias=False,
                      name="Conv")(img_input)
    x = bn(name="Conv/BatchNorm")(x)
    x = HardSwish(name="Conv/HardSwish")(x)

    inverted_cnf = partial(_inverted_res_block, alpha=alpha)
    # input, input_c, k_size, expand_c, use_se, activation, stride, block_id
    x = inverted_cnf(x, 16, 3, 16, 16, False, "RE", 1, 0)
    x = inverted_cnf(x, 16, 3, 64, 24, False, "RE", 2, 1)
    x = inverted_cnf(x, 24, 3, 72, 24, False, "RE", 1, 2)
    x = inverted_cnf(x, 24, 5, 72, 40, True, "RE", 2, 3)
    x = inverted_cnf(x, 40, 5, 120, 40, True, "RE", 1, 4)
    x = inverted_cnf(x, 40, 5, 120, 40, True, "RE", 1, 5)
    x = inverted_cnf(x, 40, 3, 240, 80, False, "HS", 2, 6)
    x = inverted_cnf(x, 80, 3, 200, 80, False, "HS", 1, 7)
    x = inverted_cnf(x, 80, 3, 184, 80, False, "HS", 1, 8)
    x = inverted_cnf(x, 80, 3, 184, 80, False, "HS", 1, 9)
    x = inverted_cnf(x, 80, 3, 480, 112, True, "HS", 1, 10)
    x = inverted_cnf(x, 112, 3, 672, 112, True, "HS", 1, 11)
    x = inverted_cnf(x, 112, 5, 672, 160, True, "HS", 2, 12)
    x = inverted_cnf(x, 160, 5, 960, 160, True, "HS", 1, 13)
    x = inverted_cnf(x, 160, 5, 960, 160, True, "HS", 1, 14)

    last_c = _make_divisible(160 * 6 * alpha)
    last_point_c = _make_divisible(1280 * alpha)

    x = layers.Conv2D(filters=last_c,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name="Conv_1")(x)
    x = bn(name="Conv_1/BatchNorm")(x)
    x = HardSwish(name="Conv_1/HardSwish")(x)

    if include_top is True:
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Reshape((1, 1, last_c))(x)

        # fc1
        x = layers.Conv2D(filters=last_point_c,
                          kernel_size=1,
                          padding='same',
                          name="Conv_2")(x)
        x = HardSwish(name="Conv_2/HardSwish")(x)

        # fc2
        x = layers.Conv2D(filters=num_classes,
                          kernel_size=1,
                          padding='same',
                          name='Logits/Conv2d_1c_1x1')(x)
        x = layers.Flatten()(x)
        x = layers.Softmax(name="Predictions")(x)

    model = Model(img_input, x, name="MobilenetV3large")

    return model


def mobilenet_v3_small(input_shape=(224, 224, 3),
                       num_classes=1000,
                       alpha=1.0,
                       include_top=True):
    """
    download weights url:
    链接: https://pan.baidu.com/s/1vrQ_6HdDTHL1UUAN6nSEcw  密码: rrf0
    """
    bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99)
    img_input = layers.Input(shape=input_shape)

    x = layers.Conv2D(filters=16,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='same',
                      use_bias=False,
                      name="Conv")(img_input)
    x = bn(name="Conv/BatchNorm")(x)
    x = HardSwish(name="Conv/HardSwish")(x)

    inverted_cnf = partial(_inverted_res_block, alpha=alpha)
    # input, input_c, k_size, expand_c, use_se, activation, stride, block_id
    x = inverted_cnf(x, 16, 3, 16, 16, True, "RE", 2, 0)
    x = inverted_cnf(x, 16, 3, 72, 24, False, "RE", 2, 1)
    x = inverted_cnf(x, 24, 3, 88, 24, False, "RE", 1, 2)
    x = inverted_cnf(x, 24, 5, 96, 40, True, "HS", 2, 3)
    x = inverted_cnf(x, 40, 5, 240, 40, True, "HS", 1, 4)
    x = inverted_cnf(x, 40, 5, 240, 40, True, "HS", 1, 5)
    x = inverted_cnf(x, 40, 5, 120, 48, True, "HS", 1, 6)
    x = inverted_cnf(x, 48, 5, 144, 48, True, "HS", 1, 7)
    x = inverted_cnf(x, 48, 5, 288, 96, True, "HS", 2, 8)
    x = inverted_cnf(x, 96, 5, 576, 96, True, "HS", 1, 9)
    x = inverted_cnf(x, 96, 5, 576, 96, True, "HS", 1, 10)

    last_c = _make_divisible(96 * 6 * alpha)
    last_point_c = _make_divisible(1024 * alpha)

    x = layers.Conv2D(filters=last_c,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name="Conv_1")(x)
    x = bn(name="Conv_1/BatchNorm")(x)
    x = HardSwish(name="Conv_1/HardSwish")(x)

    if include_top is True:
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Reshape((1, 1, last_c))(x)

        # fc1
        x = layers.Conv2D(filters=last_point_c,
                          kernel_size=1,
                          padding='same',
                          name="Conv_2")(x)
        x = HardSwish(name="Conv_2/HardSwish")(x)

        # fc2
        x = layers.Conv2D(filters=num_classes,
                          kernel_size=1,
                          padding='same',
                          name='Logits/Conv2d_1c_1x1')(x)
        x = layers.Flatten()(x)
        x = layers.Softmax(name="Predictions")(x)

    model = Model(img_input, x, name="MobilenetV3large")

    return model


================================================
FILE: tensorflow_classification/Test6_mobilenet/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf

from model_v2 import MobileNetV2


def main():
    im_height = 224
    im_width = 224
    num_classes = 5

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # scaling pixel value to (-1,1)
    img = np.array(img).astype(np.float32)
    img = ((img / 255.) - 0.5) * 2.0

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    feature = MobileNetV2(include_top=False)
    model = tf.keras.Sequential([feature,
                                 tf.keras.layers.GlobalAvgPool2D(),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(num_classes),
                                 tf.keras.layers.Softmax()])
    weights_path = './save_weights/resMobileNetV2.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/read_ckpt.py
================================================
"""
建议直接下载使用我转好的权重
链接: https://pan.baidu.com/s/1YgFoIKHqooMrTQg_IqI2hA  密码: 2qht
"""
import tensorflow as tf


def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list):
    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        var_list = tf.train.list_variables(ckpt_path)
        new_var_list = []

        for var_name, shape in var_list:
            # print(var_name)
            if var_name in except_list:
                continue
            if "RMSProp" in var_name or "Exponential" in var_name:
                continue
            var = tf.train.load_variable(ckpt_path, var_name)
            new_var_name = var_name.replace('MobilenetV2/', "")
            new_var_name = new_var_name.replace("/expand/weights", "/expand/Conv2d/weights")
            new_var_name = new_var_name.replace("Conv/weights", "Conv/Conv2d/kernel")
            new_var_name = new_var_name.replace("Conv_1/weights", "Conv_1/Conv2d/kernel")
            new_var_name = new_var_name.replace("weights", "kernel")
            new_var_name = new_var_name.replace("biases", "bias")

            first_word = new_var_name.split('/')[0]
            if "expanded_conv" in first_word:
                last_word = first_word.split('expanded_conv')[-1]
                if len(last_word) > 0:
                    new_word = "inverted_residual" + last_word + "/expanded_conv/"
                else:
                    new_word = "inverted_residual/expanded_conv/"
                new_var_name = new_word + new_var_name.split('/', maxsplit=1)[-1]
            print(new_var_name)
            re_var = tf.Variable(var, name=new_var_name)
            new_var_list.append(re_var)

        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1280, num_classes]), name="Logits/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="Logits/bias")

        new_var_list.append(re_var)
        tf.keras.initializers.he_uniform()
        saver = tf.compat.v1.train.Saver(new_var_list)
        sess.run(tf.compat.v1.global_variables_initializer())
        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)


def main():
    except_list = ['global_step', 'MobilenetV2/Logits/Conv2d_1c_1x1/biases', 'MobilenetV2/Logits/Conv2d_1c_1x1/weights']
    ckpt_path = './pretrain_model/mobilenet_v2_1.0_224.ckpt'
    new_ckpt_path = './pretrain_weights.ckpt'
    num_classes = 5
    rename_var(ckpt_path, new_ckpt_path, num_classes, except_list)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/trainGPU_mobilenet_v2.py
================================================
from model_v2 import MobileNetV2
import tensorflow as tf
import json
import os
import time
import glob
import random
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def main():
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)
            exit(-1)

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 30

    # class dict
    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]
    class_num = len(data_class)
    class_dict = dict((value, index) for index, value in enumerate(data_class))

    # reverse value and key of dict
    inverse_dict = dict((val, key) for key, val in class_dict.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    # load train images list
    train_image_list = glob.glob(train_dir+"/*/*.jpg")
    random.shuffle(train_image_list)
    train_num = len(train_image_list)
    assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir)
    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]

    # load validation images list
    val_image_list = glob.glob(validation_dir+"/*/*.jpg")
    random.shuffle(val_image_list)
    val_num = len(val_image_list)
    assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir)
    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    def process_train_img(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.random_flip_left_right(image)
        # image = (image - 0.5) / 0.5
        image = (image - 0.5) * 2.0
        return image, label

    def process_val_img(img_path, label):
        label = tf.one_hot(label, depth=class_num)
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image, [im_height, im_width])
        # image = (image - 0.5) / 0.5
        image = (image - 0.5) * 2.0
        return image, label

    AUTOTUNE = tf.data.experimental.AUTOTUNE

    # load train dataset
    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))
    train_dataset = train_dataset.shuffle(buffer_size=train_num)\
                                 .map(process_train_img, num_parallel_calls=AUTOTUNE)\
                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)

    # load train dataset
    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))
    val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                             .repeat().batch(batch_size)

    # 实例化模型
    model = MobileNetV2(num_classes=5)
    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path + "*")), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path)
    for layer_t in model.layers[:-1]:
        layer_t.trainable = False

    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            output = model(images, training=True)
            loss = loss_object(labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def test_step(images, labels):
        output = model(images, training=False)
        t_loss = loss_object(labels, output)

        test_loss(t_loss)
        test_accuracy(labels, output)

    best_test_loss = float('inf')
    train_step_num = train_num // batch_size
    val_step_num = val_num // batch_size
    for epoch in range(1, epochs+1):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        t1 = time.perf_counter()
        for index, (images, labels) in enumerate(train_dataset):
            train_step(images, labels)
            if index+1 == train_step_num:
                break
        print(time.perf_counter()-t1)

        for index, (images, labels) in enumerate(val_dataset):
            test_step(images, labels)
            if index+1 == val_step_num:
                break

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))
        if test_loss.result() < best_test_loss:
            model.save_weights("./save_weights/myMobileNet.ckpt".format(epoch), save_format='tf')


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/train_mobilenet_v2.py
================================================
import os
import sys
import glob
import json

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm

from model_v2 import MobileNetV2


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    im_height = 224
    im_width = 224
    batch_size = 16
    epochs = 20
    num_classes = 5

    def pre_function(img):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img / 255.
        img = (img - 0.5) * 2.0
        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(horizontal_flip=True,
                                               preprocessing_function=pre_function)

    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    # img, _ = next(train_data_gen)
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    # create model except fc layer
    feature = MobileNetV2(include_top=False)
    # download weights 链接: https://pan.baidu.com/s/1YgFoIKHqooMrTQg_IqI2hA  密码: 2qht
    pre_weights_path = './pretrain_weights.ckpt'
    assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path)
    feature.load_weights(pre_weights_path)
    feature.trainable = False
    feature.summary()

    # add last fc layer
    model = tf.keras.Sequential([feature,
                                 tf.keras.layers.GlobalAvgPool2D(),
                                 tf.keras.layers.Dropout(rate=0.5),
                                 tf.keras.layers.Dense(num_classes),
                                 tf.keras.layers.Softmax()])
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            output = model(images, training=True)
            loss = loss_object(labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def val_step(images, labels):
        output = model(images, training=False)
        loss = loss_object(labels, output)

        val_loss(loss)
        val_accuracy(labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)
        for step in train_bar:
            images, labels = next(train_data_gen)
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # validate
        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)
        for step in val_bar:
            val_images, val_labels = next(val_data_gen)
            val_step(val_images, val_labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            model.save_weights("./save_weights/resMobileNetV2.ckpt", save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/train_mobilenet_v3.py
================================================
import os
import sys

import tensorflow as tf
from tqdm import tqdm

from model_v3 import mobilenet_v3_large
from utils import generate_ds

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    im_height = 224
    im_width = 224
    batch_size = 16
    epochs = 20
    num_classes = 5
    freeze_layer = False

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)

    # create model
    model = mobilenet_v3_large(input_shape=(im_height, im_width, 3),
                               num_classes=num_classes,
                               include_top=True)

    # load weights
    pre_weights_path = './weights_mobilenet_v3_large_224_1.0_float.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    if freeze_layer is True:
        # freeze layer, only training 2 last layers
        for layer in model.layers:
            if layer.name not in ["Conv_2", "Logits/Conv2d_1c_1x1"]:
                layer.trainable = False
            else:
                print("training: " + layer.name)

    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            loss = loss_object(train_labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            model.save_weights("./save_weights/resMobileNetV3.ckpt", save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/trans_v3_weights.py
================================================
import re
import tensorflow as tf
from model_v3 import mobilenet_v3_large


def change_word(word: str):
    word = word.replace("MobilenetV3/", "")

    if "weights" in word:
        word = word.replace("weights", "kernel")
    elif "Conv" in word and "biases" in word:
        word = word.replace("biases", "bias")

    return word


def rename_var(ckpt_path, m_info):
    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        var_list = tf.train.list_variables(ckpt_path)
        pattern = "ExponentialMovingAverage|Momentum|global_step"

        var_dict = dict((change_word(name), [name, shape])
                        for name, shape in var_list
                        if len(re.findall(pattern, name)) == 0)

        for k, v in m_info:
            assert k in var_dict, "{} not in var_dict".format(k)
            assert v == var_dict[k][1], "shape {} not equal {}".format(v, var_dict[k][1])

        weights = []
        for k, _ in m_info:
            var = tf.train.load_variable(ckpt_path, var_dict[k][0])
            weights.append(var)

        return weights


def main():
    # https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz
    ckpt_path = './v3-large_224_1.0_float/pristine/model.ckpt-540000'
    save_path = './pre_mobilev3.h5'
    m = mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1001, include_top=True)
    m_info = [(i.name.replace(":0", ""), list(i.shape))
              for i in m.weights]
    weights = rename_var(ckpt_path, m_info)
    m.set_weights(weights)
    m.save_weights(save_path)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test6_mobilenet/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                im_height: int,
                im_width: int,
                batch_size: int,
                val_rate: float = 0.1):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param im_height: 输入网络图像的高度
    :param im_width:  输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :return:
    """
    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        # image = tf.cast(image, tf.float32)
        # image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        image = tf.image.random_flip_left_right(image)
        image = (image - 0.5) / 0.5
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        # image = tf.cast(image, tf.float32)
        # image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        image = (image - 0.5) / 0.5
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False):
        ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/Test7_shuffleNet/model.py
================================================
import tensorflow as tf
from tensorflow.keras import layers, Model


class ConvBNReLU(layers.Layer):
    def __init__(self,
                 filters: int = 1,
                 kernel_size: int = 1,
                 strides: int = 1,
                 padding: str = 'same',
                 **kwargs):
        super(ConvBNReLU, self).__init__(**kwargs)

        self.conv = layers.Conv2D(filters=filters,
                                  kernel_size=kernel_size,
                                  strides=strides,
                                  padding=padding,
                                  use_bias=False,
                                  kernel_regularizer=tf.keras.regularizers.l2(4e-5),
                                  name="conv1")
        self.bn = layers.BatchNormalization(momentum=0.9, name="bn")
        self.relu = layers.ReLU()

    def call(self, inputs, training=None, **kwargs):
        x = self.conv(inputs)
        x = self.bn(x, training=training)
        x = self.relu(x)
        return x


class DWConvBN(layers.Layer):
    def __init__(self,
                 kernel_size: int = 3,
                 strides: int = 1,
                 padding: str = 'same',
                 **kwargs):
        super(DWConvBN, self).__init__(**kwargs)
        self.dw_conv = layers.DepthwiseConv2D(kernel_size=kernel_size,
                                              strides=strides,
                                              padding=padding,
                                              use_bias=False,
                                              kernel_regularizer=tf.keras.regularizers.l2(4e-5),
                                              name="dw1")
        self.bn = layers.BatchNormalization(momentum=0.9, name="bn")

    def call(self, inputs, training=None, **kwargs):
        x = self.dw_conv(inputs)
        x = self.bn(x, training=training)
        return x


class ChannelShuffle(layers.Layer):
    def __init__(self, shape, groups: int = 2, **kwargs):
        super(ChannelShuffle, self).__init__(**kwargs)
        batch_size, height, width, num_channels = shape
        assert num_channels % 2 == 0
        channel_per_group = num_channels // groups

        # Tuple of integers, does not include the samples dimension (batch size).
        self.reshape1 = layers.Reshape((height, width, groups, channel_per_group))
        self.reshape2 = layers.Reshape((height, width, num_channels))

    def call(self, inputs, **kwargs):
        x = self.reshape1(inputs)
        x = tf.transpose(x, perm=[0, 1, 2, 4, 3])
        x = self.reshape2(x)
        return x


class ChannelSplit(layers.Layer):
    def __init__(self, num_splits: int = 2, **kwargs):
        super(ChannelSplit, self).__init__(**kwargs)
        self.num_splits = num_splits

    def call(self, inputs, **kwargs):
        b1, b2 = tf.split(inputs,
                          num_or_size_splits=self.num_splits,
                          axis=-1)
        return b1, b2


def shuffle_block_s1(inputs, output_c: int, stride: int, prefix: str):
    if stride != 1:
        raise ValueError("illegal stride value.")

    assert output_c % 2 == 0
    branch_c = output_c // 2

    x1, x2 = ChannelSplit(name=prefix + "/split")(inputs)

    # main branch
    x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv1")(x2)
    x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b2_dw1")(x2)
    x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv2")(x2)

    x = layers.Concatenate(name=prefix + "/concat")([x1, x2])
    x = ChannelShuffle(x.shape, name=prefix + "/channelshuffle")(x)

    return x


def shuffle_block_s2(inputs, output_c: int, stride: int, prefix: str):
    if stride != 2:
        raise ValueError("illegal stride value.")

    assert output_c % 2 == 0
    branch_c = output_c // 2

    # shortcut branch
    x1 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b1_dw1")(inputs)
    x1 = ConvBNReLU(filters=branch_c, name=prefix + "/b1_conv1")(x1)

    # main branch
    x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv1")(inputs)
    x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b2_dw1")(x2)
    x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv2")(x2)

    x = layers.Concatenate(name=prefix + "/concat")([x1, x2])
    x = ChannelShuffle(x.shape, name=prefix + "/channelshuffle")(x)

    return x


def shufflenet_v2(num_classes: int,
                  input_shape: tuple,
                  stages_repeats: list,
                  stages_out_channels: list):
    img_input = layers.Input(shape=input_shape)
    if len(stages_repeats) != 3:
        raise ValueError("expected stages_repeats as list of 3 positive ints")
    if len(stages_out_channels) != 5:
        raise ValueError("expected stages_out_channels as list of 5 positive ints")

    x = ConvBNReLU(filters=stages_out_channels[0],
                   kernel_size=3,
                   strides=2,
                   name="conv1")(img_input)

    x = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2,
                            padding='same',
                            name="maxpool")(x)

    stage_name = ["stage{}".format(i) for i in [2, 3, 4]]
    for name, repeats, output_channels in zip(stage_name,
                                              stages_repeats,
                                              stages_out_channels[1:]):
        for i in range(repeats):
            if i == 0:
                x = shuffle_block_s2(x, output_c=output_channels, stride=2, prefix=name + "_{}".format(i))
            else:
                x = shuffle_block_s1(x, output_c=output_channels, stride=1, prefix=name + "_{}".format(i))

    x = ConvBNReLU(filters=stages_out_channels[-1], name="conv5")(x)

    x = layers.GlobalAveragePooling2D(name="globalpool")(x)

    x = layers.Dense(units=num_classes, name="fc")(x)
    x = layers.Softmax()(x)

    model = Model(img_input, x, name="ShuffleNetV2_1.0")

    return model


def shufflenet_v2_x1_0(num_classes=1000, input_shape=(224, 224, 3)):
    # 权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw  密码: mhts
    model = shufflenet_v2(num_classes=num_classes,
                          input_shape=input_shape,
                          stages_repeats=[4, 8, 4],
                          stages_out_channels=[24, 116, 232, 464, 1024])
    return model


def shufflenet_v2_x0_5(num_classes=1000, input_shape=(224, 224, 3)):
    model = shufflenet_v2(num_classes=num_classes,
                          input_shape=input_shape,
                          stages_repeats=[4, 8, 4],
                          stages_out_channels=[24, 48, 96, 192, 1024])
    return model


def shufflenet_v2_x2_0(num_classes=1000, input_shape=(224, 224, 3)):
    model = shufflenet_v2(num_classes=num_classes,
                          input_shape=input_shape,
                          stages_repeats=[4, 8, 4],
                          stages_out_channels=[24, 244, 488, 976, 2048])
    return model


================================================
FILE: tensorflow_classification/Test7_shuffleNet/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt

from model import shufflenet_v2_x1_0


def main():
    im_height = 224
    im_width = 224
    num_classes = 5

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # scaling pixel value to (-1,1)
    img = np.array(img).astype(np.float32)
    img = (img / 255. - mean) / std

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = shufflenet_v2_x1_0(num_classes=num_classes)

    weights_path = './save_weights/shufflenetv2.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test7_shuffleNet/train.py
================================================
import os
import sys
import math
import datetime

import tensorflow as tf
from tqdm import tqdm

from model import shufflenet_v2_x1_0
from utils import generate_ds

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    im_height = 224
    im_width = 224
    batch_size = 16
    epochs = 30
    num_classes = 5

    log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)

    # create model
    model = shufflenet_v2_x1_0(input_shape=(im_height, im_width, 3),
                               num_classes=num_classes)

    # load weights
    # x1.0权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw  密码: mhts
    pre_weights_path = './shufflenetv2_x1_0.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    model.summary()

    # custom learning rate curve
    def scheduler(now_epoch):
        initial_lr = 0.1
        end_lr_rate = 0.1  # end_lr = initial_lr * end_lr_rate
        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine
        new_lr = rate * initial_lr

        # writing lr into tensorboard
        with train_writer.as_default():
            tf.summary.scalar('learning rate', data=new_lr, step=epoch)

        return new_lr

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            loss = loss_object(train_labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # update learning rate
        optimizer.learning_rate = scheduler(epoch)

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())
        # writing training loss and acc
        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        # writing validation loss and acc
        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            model.save_weights("./save_weights/shufflenetv2.ckpt", save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test7_shuffleNet/trans_weights.py
================================================
import torch
import numpy as np

from model import shufflenet_v2_x1_0


def main():

    m = shufflenet_v2_x1_0()
    m_info = [(i.name.replace(":0", ""), list(i.shape))
              for i in m.weights]

    weights_path = './shufflenetv2_x1.pth'
    weights_dict = torch.load(weights_path)
    new_weights_dict = dict()
    for key, value in weights_dict.items():
        if "conv1.0.weight" == key:
            value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32)
            new_weights_dict["conv1/conv1/kernel"] = value
        elif "conv1.1.weight" == key:
            new_weights_dict["conv1/bn/gamma"] = value
        elif "conv1.1.bias" == key:
            new_weights_dict["conv1/bn/beta"] = value
        elif "conv1.1.running_mean" == key:
            new_weights_dict["conv1/bn/moving_mean"] = value
        elif "conv1.1.running_var" == key:
            new_weights_dict["conv1/bn/moving_variance"] = value
        elif "stage" in key:
            names = key.split(".branch")
            num_stage, num_block = names[0].replace("stage", "").split(".")
            tf_name_prefix = "stage{}_{}/".format(num_stage, num_block)

            torch_name2tf_name = {"1.0.weight": "b1_dw1/dw1/depthwise_kernel",
                                  "1.1.weight": "b1_dw1/bn/gamma",
                                  "1.1.bias": "b1_dw1/bn/beta",
                                  "1.1.running_mean": "b1_dw1/bn/moving_mean",
                                  "1.1.running_var": "b1_dw1/bn/moving_variance",
                                  "1.2.weight": "b1_conv1/conv1/kernel",
                                  "1.3.weight": "b1_conv1/bn/gamma",
                                  "1.3.bias": "b1_conv1/bn/beta",
                                  "1.3.running_mean": "b1_conv1/bn/moving_mean",
                                  "1.3.running_var": "b1_conv1/bn/moving_variance",
                                  "2.0.weight": "b2_conv1/conv1/kernel",
                                  "2.1.weight": "b2_conv1/bn/gamma",
                                  "2.1.bias": "b2_conv1/bn/beta",
                                  "2.1.running_mean": "b2_conv1/bn/moving_mean",
                                  "2.1.running_var": "b2_conv1/bn/moving_variance",
                                  "2.3.weight": "b2_dw1/dw1/depthwise_kernel",
                                  "2.4.weight": "b2_dw1/bn/gamma",
                                  "2.4.bias": "b2_dw1/bn/beta",
                                  "2.4.running_mean": "b2_dw1/bn/moving_mean",
                                  "2.4.running_var": "b2_dw1/bn/moving_variance",
                                  "2.5.weight": "b2_conv2/conv1/kernel",
                                  "2.6.weight": "b2_conv2/bn/gamma",
                                  "2.6.bias": "b2_conv2/bn/beta",
                                  "2.6.running_mean": "b2_conv2/bn/moving_mean",
                                  "2.6.running_var": "b2_conv2/bn/moving_variance"}

            tf_name_postfix = torch_name2tf_name[names[1]]
            tf_name = tf_name_prefix + tf_name_postfix

            if len(value.shape) > 1:  # conv or dwconv
                if "dw" in tf_name:
                    value = np.transpose(value.detach().numpy(), (2, 3, 0, 1)).astype(np.float32)
                else:
                    value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32)

            new_weights_dict[tf_name] = value

        elif "conv5.0.weight" == key:
            value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32)
            new_weights_dict["conv5/conv1/kernel"] = value
        elif "conv5.1.weight" == key:
            new_weights_dict["conv5/bn/gamma"] = value
        elif "conv5.1.bias" == key:
            new_weights_dict["conv5/bn/beta"] = value
        elif "conv5.1.running_mean" == key:
            new_weights_dict["conv5/bn/moving_mean"] = value
        elif "conv5.1.running_var" == key:
            new_weights_dict["conv5/bn/moving_variance"] = value

        elif "fc.weight" == key:
            value = np.transpose(value.detach().numpy(), (1, 0)).astype(np.float32)
            new_weights_dict["fc/kernel"] = value

        elif "fc.bias" == key:
            new_weights_dict["fc/bias"] = value
        else:
            print(key)

    assert len(m_info) == len(new_weights_dict)

    weights_list = []
    for name, shape in m_info:
        assert name in new_weights_dict, "not found key:'{}'".format(name)
        assert tuple(shape) == new_weights_dict[name].shape, \
            "tf shape:'{}', trans shape:'{}'".format(shape,
                                                     new_weights_dict[name].shape)
        weights_list.append(new_weights_dict[name])

    m.set_weights(weights_list)
    m.save_weights("shufflenetv2_x1_0.h5", save_format="h5")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test7_shuffleNet/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                im_height: int,
                im_width: int,
                batch_size: int,
                val_rate: float = 0.1):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param im_height: 输入网络图像的高度
    :param im_width:  输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :return:
    """
    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        # image = tf.cast(image, tf.float32)
        # image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        image = tf.image.random_flip_left_right(image)
        image = (image - mean) / std
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        # image = tf.cast(image, tf.float32)
        # image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        image = (image - mean) / std
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False):
        ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/Test9_efficientNet/model.py
================================================
import math
from typing import Union

from tensorflow.keras import layers, Model


CONV_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 2.0,
        'mode': 'fan_out',
        'distribution': 'truncated_normal'
    }
}

DENSE_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 1. / 3.,
        'mode': 'fan_out',
        'distribution': 'uniform'
    }
}


def correct_pad(input_size: Union[int, tuple], kernel_size: int):
    """Returns a tuple for zero-padding for 2D convolution with downsampling.

    Arguments:
      input_size: Input tensor size.
      kernel_size: An integer or tuple/list of 2 integers.

    Returns:
      A tuple.
    """

    if isinstance(input_size, int):
        input_size = (input_size, input_size)

    kernel_size = (kernel_size, kernel_size)

    adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
    correct = (kernel_size[0] // 2, kernel_size[1] // 2)
    return ((correct[0] - adjust[0], correct[0]),
            (correct[1] - adjust[1], correct[1]))


def block(inputs,
          activation: str = "swish",
          drop_rate: float = 0.,
          name: str = "",
          input_channel: int = 32,
          output_channel: int = 16,
          kernel_size: int = 3,
          strides: int = 1,
          expand_ratio: int = 1,
          use_se: bool = True,
          se_ratio: float = 0.25):
    """An inverted residual block.

      Arguments:
          inputs: input tensor.
          activation: activation function.
          drop_rate: float between 0 and 1, fraction of the input units to drop.
          name: string, block label.
          input_channel: integer, the number of input filters.
          output_channel: integer, the number of output filters.
          kernel_size: integer, the dimension of the convolution window.
          strides: integer, the stride of the convolution.
          expand_ratio: integer, scaling coefficient for the input filters.
          use_se: whether to use se
          se_ratio: float between 0 and 1, fraction to squeeze the input filters.

      Returns:
          output tensor for the block.
      """
    # Expansion phase
    filters = input_channel * expand_ratio
    if expand_ratio != 1:
        x = layers.Conv2D(filters=filters,
                          kernel_size=1,
                          padding="same",
                          use_bias=False,
                          kernel_initializer=CONV_KERNEL_INITIALIZER,
                          name=name + "expand_conv")(inputs)
        x = layers.BatchNormalization(name=name + "expand_bn")(x)
        x = layers.Activation(activation, name=name + "expand_activation")(x)
    else:
        x = inputs

    # Depthwise Convolution
    if strides == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(filters, kernel_size),
                                 name=name + "dwconv_pad")(x)

    x = layers.DepthwiseConv2D(kernel_size=kernel_size,
                               strides=strides,
                               padding="same" if strides == 1 else "valid",
                               use_bias=False,
                               depthwise_initializer=CONV_KERNEL_INITIALIZER,
                               name=name + "dwconv")(x)
    x = layers.BatchNormalization(name=name + "bn")(x)
    x = layers.Activation(activation, name=name + "activation")(x)

    if use_se:
        filters_se = int(input_channel * se_ratio)
        se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
        se = layers.Reshape((1, 1, filters), name=name + "se_reshape")(se)
        se = layers.Conv2D(filters=filters_se,
                           kernel_size=1,
                           padding="same",
                           activation=activation,
                           kernel_initializer=CONV_KERNEL_INITIALIZER,
                           name=name + "se_reduce")(se)
        se = layers.Conv2D(filters=filters,
                           kernel_size=1,
                           padding="same",
                           activation="sigmoid",
                           kernel_initializer=CONV_KERNEL_INITIALIZER,
                           name=name + "se_expand")(se)
        x = layers.multiply([x, se], name=name + "se_excite")

    # Output phase
    x = layers.Conv2D(filters=output_channel,
                      kernel_size=1,
                      padding="same",
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name=name + "project_conv")(x)
    x = layers.BatchNormalization(name=name + "project_bn")(x)
    if strides == 1 and input_channel == output_channel:
        if drop_rate > 0:
            x = layers.Dropout(rate=drop_rate,
                               noise_shape=(None, 1, 1, 1),  # binary dropout mask
                               name=name + "drop")(x)
        x = layers.add([x, inputs], name=name + "add")

    return x


def efficient_net(width_coefficient,
                  depth_coefficient,
                  input_shape=(224, 224, 3),
                  dropout_rate=0.2,
                  drop_connect_rate=0.2,
                  activation="swish",
                  model_name="efficientnet",
                  include_top=True,
                  num_classes=1000):
    """Instantiates the EfficientNet architecture using given scaling coefficients.

      Reference:
      - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](
          https://arxiv.org/abs/1905.11946) (ICML 2019)

      Optionally loads weights pre-trained on ImageNet.
      Note that the data format convention used by the model is
      the one specified in your Keras config at `~/.keras/keras.json`.

      Arguments:
        width_coefficient: float, scaling coefficient for network width.
        depth_coefficient: float, scaling coefficient for network depth.
        input_shape: tuple, default input image shape(not including the batch size).
        dropout_rate: float, dropout rate before final classifier layer.
        drop_connect_rate: float, dropout rate at skip connections.
        activation: activation function.
        model_name: string, model name.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        num_classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

      Returns:
        A `keras.Model` instance.
    """

    # kernel_size, repeats, in_channel, out_channel, exp_ratio, strides, SE
    block_args = [[3, 1, 32, 16, 1, 1, True],
                  [3, 2, 16, 24, 6, 2, True],
                  [5, 2, 24, 40, 6, 2, True],
                  [3, 3, 40, 80, 6, 2, True],
                  [5, 3, 80, 112, 6, 1, True],
                  [5, 4, 112, 192, 6, 2, True],
                  [3, 1, 192, 320, 6, 1, True]]

    def round_filters(filters, divisor=8):
        """Round number of filters based on depth multiplier."""
        filters *= width_coefficient
        new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_filters < 0.9 * filters:
            new_filters += divisor
        return int(new_filters)

    def round_repeats(repeats):
        """Round number of repeats based on depth multiplier."""
        return int(math.ceil(depth_coefficient * repeats))

    img_input = layers.Input(shape=input_shape)

    # data preprocessing
    x = layers.experimental.preprocessing.Rescaling(1. / 255.)(img_input)
    x = layers.experimental.preprocessing.Normalization()(x)

    # first conv2d
    x = layers.ZeroPadding2D(padding=correct_pad(input_shape[:2], 3),
                             name="stem_conv_pad")(x)
    x = layers.Conv2D(filters=round_filters(32),
                      kernel_size=3,
                      strides=2,
                      padding="valid",
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name="stem_conv")(x)
    x = layers.BatchNormalization(name="stem_bn")(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # build blocks
    b = 0
    num_blocks = float(sum(round_repeats(i[1]) for i in block_args))
    for i, args in enumerate(block_args):
        assert args[1] > 0
        # Update block input and output filters based on depth multiplier.
        args[2] = round_filters(args[2])  # input_channel
        args[3] = round_filters(args[3])  # output_channel

        for j in range(round_repeats(args[1])):
            x = block(x,
                      activation=activation,
                      drop_rate=drop_connect_rate * b / num_blocks,
                      name="block{}{}_".format(i + 1, chr(j + 97)),
                      kernel_size=args[0],
                      input_channel=args[2] if j == 0 else args[3],
                      output_channel=args[3],
                      expand_ratio=args[4],
                      strides=args[5] if j == 0 else 1,
                      use_se=args[6])
            b += 1

    # build top
    x = layers.Conv2D(round_filters(1280),
                      kernel_size=1,
                      padding="same",
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name="top_conv")(x)
    x = layers.BatchNormalization(name="top_bn")(x)
    x = layers.Activation(activation, name="top_activation")(x)
    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        x = layers.Dense(units=num_classes,
                         activation="softmax",
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         name="predictions")(x)

    model = Model(img_input, x, name=model_name)

    return model


def efficientnet_b0(num_classes=1000,
                    include_top=True,
                    input_shape=(224, 224, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb0.h5
    return efficient_net(width_coefficient=1.0,
                         depth_coefficient=1.0,
                         input_shape=input_shape,
                         dropout_rate=0.2,
                         model_name="efficientnetb0",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b1(num_classes=1000,
                    include_top=True,
                    input_shape=(240, 240, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb1.h5
    return efficient_net(width_coefficient=1.0,
                         depth_coefficient=1.1,
                         input_shape=input_shape,
                         dropout_rate=0.2,
                         model_name="efficientnetb1",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b2(num_classes=1000,
                    include_top=True,
                    input_shape=(260, 260, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb2.h5
    return efficient_net(width_coefficient=1.1,
                         depth_coefficient=1.2,
                         input_shape=input_shape,
                         dropout_rate=0.3,
                         model_name="efficientnetb2",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b3(num_classes=1000,
                    include_top=True,
                    input_shape=(300, 300, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb3.h5
    return efficient_net(width_coefficient=1.2,
                         depth_coefficient=1.4,
                         input_shape=input_shape,
                         dropout_rate=0.3,
                         model_name="efficientnetb3",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b4(num_classes=1000,
                    include_top=True,
                    input_shape=(380, 380, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb4.h5
    return efficient_net(width_coefficient=1.4,
                         depth_coefficient=1.8,
                         input_shape=input_shape,
                         dropout_rate=0.4,
                         model_name="efficientnetb4",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b5(num_classes=1000,
                    include_top=True,
                    input_shape=(456, 456, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb5.h5
    return efficient_net(width_coefficient=1.6,
                         depth_coefficient=2.2,
                         input_shape=input_shape,
                         dropout_rate=0.4,
                         model_name="efficientnetb5",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b6(num_classes=1000,
                    include_top=True,
                    input_shape=(528, 528, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb6.h5
    return efficient_net(width_coefficient=1.8,
                         depth_coefficient=2.6,
                         input_shape=input_shape,
                         dropout_rate=0.5,
                         model_name="efficientnetb6",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b7(num_classes=1000,
                    include_top=True,
                    input_shape=(600, 600, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb7.h5
    return efficient_net(width_coefficient=2.0,
                         depth_coefficient=3.1,
                         input_shape=input_shape,
                         dropout_rate=0.5,
                         model_name="efficientnetb7",
                         include_top=include_top,
                         num_classes=num_classes)


================================================
FILE: tensorflow_classification/Test9_efficientNet/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt

from model import efficientnet_b0 as create_model


def main():
    num_classes = 5

    img_size = {"B0": 224,
                "B1": 240,
                "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}
    num_model = "B0"
    im_height = im_width = img_size[num_model]

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image to 224x224
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # read image
    img = np.array(img).astype(np.float32)

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=num_classes)

    weights_path = './save_weights/efficientnet.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img))
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test9_efficientNet/train.py
================================================
import os
import sys
import math
import datetime

import tensorflow as tf
from tqdm import tqdm

from model import efficientnet_b0 as create_model
from utils import generate_ds

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    img_size = {"B0": 224,
                "B1": 240,
                "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}

    num_model = "B0"
    im_height = im_width = img_size[num_model]
    batch_size = 16
    epochs = 30
    num_classes = 5
    freeze_layers = True
    initial_lr = 0.01

    log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)

    # create model
    model = create_model(num_classes=num_classes)

    # load weights
    pre_weights_path = './efficientnetb0.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    # freeze bottom layers
    if freeze_layers:
        unfreeze_layers = ["top_conv", "top_bn", "predictions"]
        for layer in model.layers:
            if layer.name not in unfreeze_layers:
                layer.trainable = False
            else:
                print("training {}".format(layer.name))

    model.summary()

    # custom learning rate curve
    def scheduler(now_epoch):
        end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate
        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine
        new_lr = rate * initial_lr

        # writing lr into tensorboard
        with train_writer.as_default():
            tf.summary.scalar('learning rate', data=new_lr, step=epoch)

        return new_lr

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            loss = loss_object(train_labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # update learning rate
        optimizer.learning_rate = scheduler(epoch)

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())
        # writing training loss and acc
        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        # writing validation loss and acc
        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            save_name = "./save_weights/efficientnet.ckpt"
            model.save_weights(save_name, save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/Test9_efficientNet/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                im_height: int,
                im_width: int,
                batch_size: int,
                val_rate: float = 0.1):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param im_height: 输入网络图像的高度
    :param im_width:  输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :return:
    """
    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        image = tf.image.random_flip_left_right(image)
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False):
        ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/analyze_weights_featuremap/alexnet_model.py
================================================
from tensorflow.keras import layers, models, Model, Sequential


def AlexNet_v1(im_height=224, im_width=224, class_num=1000):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")  # output(None, 224, 224, 3)
    x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image)                      # output(None, 227, 227, 3)
    x = layers.Conv2D(48, kernel_size=11, strides=4, activation="relu")(x)       # output(None, 55, 55, 48)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 27, 27, 48)
    x = layers.Conv2D(128, kernel_size=5, padding="same", activation="relu")(x)  # output(None, 27, 27, 128)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 13, 13, 128)
    x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 192)
    x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 192)
    x = layers.Conv2D(128, kernel_size=3, padding="same", activation="relu")(x)  # output(None, 13, 13, 128)
    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 6, 6, 128)

    x = layers.Flatten()(x)                         # output(None, 6*6*128)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(2048, activation="relu")(x)    # output(None, 2048)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(2048, activation="relu")(x)    # output(None, 2048)
    x = layers.Dense(class_num)(x)                  # output(None, 5)
    predict = layers.Softmax()(x)

    model = models.Model(inputs=input_image, outputs=predict)
    return model


class AlexNet_v2(Model):
    def __init__(self, class_num=1000):
        super(AlexNet_v2, self).__init__()
        self.features = Sequential([
            layers.ZeroPadding2D(((1, 2), (1, 2))),                                 # output(None, 227, 227, 3)
            layers.Conv2D(48, kernel_size=11, strides=4, activation="relu"),        # output(None, 55, 55, 48)
            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 27, 27, 48)
            layers.Conv2D(128, kernel_size=5, padding="same", activation="relu"),   # output(None, 27, 27, 128)
            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 13, 13, 128)
            layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"),   # output(None, 13, 13, 192)
            layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"),   # output(None, 13, 13, 192)
            layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"),   # output(None, 13, 13, 128)
            layers.MaxPool2D(pool_size=3, strides=2)])                              # output(None, 6, 6, 128)

        self.flatten = layers.Flatten()
        self.classifier = Sequential([
            layers.Dropout(0.2),
            layers.Dense(1024, activation="relu"),                                  # output(None, 2048)
            layers.Dropout(0.2),
            layers.Dense(128, activation="relu"),                                   # output(None, 2048)
            layers.Dense(class_num),                                                # output(None, 5)
            layers.Softmax()
        ])

    def call(self, inputs, **kwargs):
        x = self.features(inputs)
        x = self.flatten(x)
        x = self.classifier(x)
        return x

    def receive_feature_map(self, x, layers_name):
        outputs = []
        for module in self.features.layers:
            x = module(x)
            if module.name in layers_name:
                outputs.append(x)
        return outputs


================================================
FILE: tensorflow_classification/analyze_weights_featuremap/analyze_feature_map.py
================================================
from alexnet_model import AlexNet_v1, AlexNet_v2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import Model, Input

im_height = 224
im_width = 224

# load image
img = Image.open("../tulip.jpg")
# resize image to 224x224
img = img.resize((im_width, im_height))

# scaling pixel value to (0-1)
img = np.array(img) / 255.

# Add the image to a batch where it's the only member.
img = (np.expand_dims(img, 0))


model = AlexNet_v1(class_num=5)  # functional api
# model = AlexNet_v2(class_num=5)  # subclass api
# model.build((None, 224, 224, 3))
# If `by_name` is False weights are loaded based on the network's topology.
model.load_weights("./myAlex.h5")
# model.load_weights("./submodel.h5")
# for layer in model.layers:
#     print(layer.name)
model.summary()
layers_name = ["conv2d", "conv2d_1"]

# functional API
try:
    input_node = model.input
    output_node = [model.get_layer(name=layer_name).output for layer_name in layers_name]
    model1 = Model(inputs=input_node, outputs=output_node)
    outputs = model1.predict(img)
    for index, feature_map in enumerate(outputs):
        # [N, H, W, C] -> [H, W, C]
        im = np.squeeze(feature_map)

        # show top 12 feature maps
        plt.figure()
        for i in range(12):
            ax = plt.subplot(3, 4, i + 1)
            # [H, W, C]
            plt.imshow(im[:, :, i], cmap='gray')
        plt.suptitle(layers_name[index])
        plt.show()
except Exception as e:
    print(e)

# subclasses API
# outputs = model.receive_feature_map(img, layers_name)
# for index, feature_maps in enumerate(outputs):
#     # [N, H, W, C] -> [H, W, C]
#     im = np.squeeze(feature_maps)
#
#     # show top 12 feature maps
#     plt.figure()
#     for i in range(12):
#         ax = plt.subplot(3, 4, i + 1)
#         # [H, W, C]
#         plt.imshow(im[:, :, i], cmap='gray')
#     plt.suptitle(layers_name[index])
#     plt.show()


================================================
FILE: tensorflow_classification/analyze_weights_featuremap/analyze_kernel_weight.py
================================================
from alexnet_model import AlexNet_v1, AlexNet_v2
import numpy as np
import matplotlib.pyplot as plt

model = AlexNet_v1(class_num=5)  # functional api
# model = AlexNet_v2(class_num=5)  # subclass api
# model.build((None, 224, 224, 3))
model.load_weights("./myAlex.h5")
# model.load_weights("./submodel.h5")
model.summary()
for layer in model.layers:
    for index, weight in enumerate(layer.weights):
        # [kernel_height, kernel_width, kernel_channel, kernel_number]
        weight_t = weight.numpy()
        # read a kernel information
        # k = weight_t[:, :, :, 0]

        # calculate mean, std, min, max
        weight_mean = weight_t.mean()
        weight_std = weight_t.std(ddof=1)
        weight_min = weight_t.min()
        weight_max = weight_t.max()
        print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean,
                                                                   weight_std,
                                                                   weight_max,
                                                                   weight_min))

        # plot hist image
        plt.close()
        weight_vec = np.reshape(weight_t, [-1])
        plt.hist(weight_vec, bins=50)
        plt.title(weight.name)
        plt.show()

================================================
FILE: tensorflow_classification/custom_dataset/train_fit.py
================================================
import os
import math
import datetime

import tensorflow as tf

from utils import generate_ds


def main():
    data_root = "/home/wz/my_project/my_github/data_set/flower_data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    num_classes = 5
    im_height = 224
    im_width = 224
    batch_size = 8
    epochs = 20
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)

    # create base model
    base_model = tf.keras.applications.ResNet50(include_top=False,
                                                input_shape=(224, 224, 3),
                                                weights='imagenet')
    # freeze base model
    base_model.trainable = False
    base_model.summary()

    # create new model on top
    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = tf.keras.applications.resnet50.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(num_classes)(x)
    model = tf.keras.Model(inputs, outputs)
    model.summary()

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    def scheduler(epoch):
        """
        自定义学习率变化
        :param epoch: 当前训练epoch
        :return:
        """
        initial_lr = 0.01
        end_lr = 0.001
        rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr  # cosine
        new_lr = rate * initial_lr

        return new_lr

    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/model_{epoch}.h5',
                                                    save_best_only=True,
                                                    save_weights_only=True,
                                                    monitor='val_accuracy'),
                 tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                write_graph=True,
                                                histogram_freq=1),
                 tf.keras.callbacks.LearningRateScheduler(schedule=scheduler)]

    model.fit(x=train_ds,
              epochs=epochs,
              validation_data=val_ds,
              callbacks=callbacks)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/custom_dataset/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                im_height: int,
                im_width: int,
                batch_size: int,
                val_rate: float = 0.1):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param im_height: 输入网络图像的高度
    :param im_width:  输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :return:
    """
    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        # image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.cast(image, tf.float32)
        # image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        image = tf.image.random_flip_left_right(image)
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        # image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.cast(image, tf.float32)
        # image = tf.image.resize(image, [im_height, im_width])
        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False):
        ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/swin_transformer/model.py
================================================
import tensorflow as tf
from tensorflow.keras import Model, layers, initializers
import numpy as np


class PatchEmbed(layers.Layer):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, patch_size=4, embed_dim=96, norm_layer=None):
        super(PatchEmbed, self).__init__()
        self.embed_dim = embed_dim
        self.patch_size = (patch_size, patch_size)
        self.norm = norm_layer(epsilon=1e-6, name="norm") if norm_layer else layers.Activation('linear')

        self.proj = layers.Conv2D(filters=embed_dim, kernel_size=patch_size,
                                  strides=patch_size, padding='SAME',
                                  kernel_initializer=initializers.LecunNormal(),
                                  bias_initializer=initializers.Zeros(),
                                  name="proj")

    def call(self, x, **kwargs):
        _, H, W, _ = x.shape

        # padding
        # 如果输入图片的H，W不是patch_size的整数倍，需要进行padding
        pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0)
        if pad_input:
            paddings = tf.constant([[0, 0],
                                    [0, self.patch_size[0] - H % self.patch_size[0]],
                                    [0, self.patch_size[1] - W % self.patch_size[1]]])
            x = tf.pad(x, paddings)

        # 下采样patch_size倍
        x = self.proj(x)
        B, H, W, C = x.shape
        # [B, H, W, C] -> [B, H*W, C]
        x = tf.reshape(x, [B, -1, C])
        x = self.norm(x)
        return x, H, W


def window_partition(x, window_size: int):
    """
        将feature map按照window_size划分成一个个没有重叠的window
        Args:
            x: (B, H, W, C)
            window_size (int): window size(M)

        Returns:
            windows: (num_windows*B, window_size, window_size, C)
        """
    B, H, W, C = x.shape
    x = tf.reshape(x, [B, H // window_size, window_size, W // window_size, window_size, C])
    # transpose: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]
    # reshape: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]
    x = tf.transpose(x, [0, 1, 3, 2, 4, 5])
    windows = tf.reshape(x, [-1, window_size, window_size, C])
    return windows


def window_reverse(windows, window_size: int, H: int, W: int):
    """
    将一个个window还原成一个feature map
    Args:
        windows: (num_windows*B, window_size, window_size, C)
        window_size (int): Window size(M)
        H (int): Height of image
        W (int): Width of image

    Returns:
        x: (B, H, W, C)
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    # reshape: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C]
    x = tf.reshape(windows, [B, H // window_size, W // window_size, window_size, window_size, -1])
    # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C]
    # reshape: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C]
    x = tf.transpose(x, [0, 1, 3, 2, 4, 5])
    x = tf.reshape(x, [B, H, W, -1])
    return x


class PatchMerging(layers.Layer):
    def __init__(self, dim: int, norm_layer=layers.LayerNormalization, name=None):
        super(PatchMerging, self).__init__(name=name)
        self.dim = dim
        self.reduction = layers.Dense(2*dim,
                                      use_bias=False,
                                      kernel_initializer=initializers.TruncatedNormal(stddev=0.02),
                                      name="reduction")
        self.norm = norm_layer(epsilon=1e-6, name="norm")

    def call(self, x, H, W):
        """
        x: [B, H*W, C]
        """
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"

        x = tf.reshape(x, [B, H, W, C])
        # padding
        # 如果输入feature map的H，W不是2的整数倍，需要进行padding
        pad_input = (H % 2 != 0) or (W % 2 != 0)
        if pad_input:
            paddings = tf.constant([[0, 0],
                                    [0, 1],
                                    [0, 1],
                                    [0, 0]])
            x = tf.pad(x, paddings)

        x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]
        x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]
        x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]
        x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]
        x = tf.concat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]
        x = tf.reshape(x, [B, -1, 4*C])  # [B, H/2*W/2, 4*C]

        x = self.norm(x)
        x = self.reduction(x)  # [B, H/2*W/2, 2*C]

        return x


class MLP(layers.Layer):
    """
    MLP as used in Vision Transformer, MLP-Mixer and related networks
    """

    k_ini = initializers.TruncatedNormal(stddev=0.02)
    b_ini = initializers.Zeros()

    def __init__(self, in_features, mlp_ratio=4.0, drop=0., name=None):
        super(MLP, self).__init__(name=name)
        self.fc1 = layers.Dense(int(in_features * mlp_ratio), name="fc1",
                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.act = layers.Activation("gelu")
        self.fc2 = layers.Dense(in_features, name="fc2",
                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.drop = layers.Dropout(drop)

    def call(self, x, training=None):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x, training=training)
        x = self.fc2(x)
        x = self.drop(x, training=training)
        return x


class WindowAttention(layers.Layer):
    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.

    Args:
        dim (int): Number of input channels.
        window_size (tuple[int]): The height and width of the window.
        num_heads (int): Number of attention heads.
        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
        attn_drop_ratio (float, optional): Dropout ratio of attention weight. Default: 0.0
        proj_drop_ratio (float, optional): Dropout ratio of output. Default: 0.0
    """

    k_ini = initializers.GlorotUniform()
    b_ini = initializers.Zeros()

    def __init__(self,
                 dim,
                 window_size,
                 num_heads=8,
                 qkv_bias=False,
                 attn_drop_ratio=0.,
                 proj_drop_ratio=0.,
                 name=None):
        super(WindowAttention, self).__init__(name=name)
        self.dim = dim
        self.window_size = window_size  # [Mh, Mw]
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5

        self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias, name="qkv",
                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.attn_drop = layers.Dropout(attn_drop_ratio)
        self.proj = layers.Dense(dim, name="proj",
                                 kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.proj_drop = layers.Dropout(proj_drop_ratio)

    def build(self, input_shape):
        # define a parameter table of relative position bias
        # [2*Mh-1 * 2*Mw-1, nH]
        self.relative_position_bias_table = self.add_weight(
            shape=[(2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1), self.num_heads],
            initializer=initializers.TruncatedNormal(stddev=0.02),
            trainable=True,
            dtype=tf.float32,
            name="relative_position_bias_table"
        )

        coords_h = np.arange(self.window_size[0])
        coords_w = np.arange(self.window_size[1])
        coords = np.stack(np.meshgrid(coords_h, coords_w, indexing="ij"))  # [2, Mh, Mw]
        coords_flatten = np.reshape(coords, [2, -1])  # [2, Mh*Mw]
        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]
        relative_coords = np.transpose(relative_coords, [1, 2, 0])   # [Mh*Mw, Mh*Mw, 2]
        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]

        self.relative_position_index = tf.Variable(tf.convert_to_tensor(relative_position_index),
                                                   trainable=False,
                                                   dtype=tf.int64,
                                                   name="relative_position_index")

    def call(self, x, mask=None, training=None):
        """
        Args:
            x: input features with shape of (num_windows*B, Mh*Mw, C)
            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
            training: whether training mode
        """
        # [batch_size*num_windows, Mh*Mw, total_embed_dim]
        B_, N, C = x.shape

        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]
        qkv = self.qkv(x)
        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]
        qkv = tf.reshape(qkv, [B_, N, 3, self.num_heads, C // self.num_heads])
        # transpose: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        qkv = tf.transpose(qkv, [2, 0, 3, 1, 4])
        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        q, k, v = qkv[0], qkv[1], qkv[2]

        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]
        # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]
        attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale

        # relative_position_bias(reshape): [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]
        relative_position_bias = tf.gather(self.relative_position_bias_table,
                                           tf.reshape(self.relative_position_index, [-1]))
        relative_position_bias = tf.reshape(relative_position_bias,
                                            [self.window_size[0] * self.window_size[1],
                                             self.window_size[0] * self.window_size[1],
                                             -1])
        relative_position_bias = tf.transpose(relative_position_bias, [2, 0, 1])  # [nH, Mh*Mw, Mh*Mw]
        attn = attn + tf.expand_dims(relative_position_bias, 0)

        if mask is not None:
            # mask: [nW, Mh*Mw, Mh*Mw]
            nW = mask.shape[0]  # num_windows
            # attn(reshape): [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]
            # mask(expand_dim): [1, nW, 1, Mh*Mw, Mh*Mw]
            attn = tf.reshape(attn, [B_ // nW, nW, self.num_heads, N, N]) + tf.expand_dims(tf.expand_dims(mask, 1), 0)
            attn = tf.reshape(attn, [-1, self.num_heads, N, N])

        attn = tf.nn.softmax(attn, axis=-1)
        attn = self.attn_drop(attn, training=training)

        # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        x = tf.matmul(attn, v)
        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]
        x = tf.transpose(x, [0, 2, 1, 3])
        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]
        x = tf.reshape(x, [B_, N, C])

        x = self.proj(x)
        x = self.proj_drop(x, training=training)
        return x


class SwinTransformerBlock(layers.Layer):
    r""" Swin Transformer Block.

    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (int): Window size.
        shift_size (int): Shift size for SW-MSA.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float, optional): Stochastic depth rate. Default: 0.0
    """

    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., name=None):
        super().__init__(name=name)
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

        self.norm1 = layers.LayerNormalization(epsilon=1e-6, name="norm1")
        self.attn = WindowAttention(dim,
                                    window_size=(window_size, window_size),
                                    num_heads=num_heads,
                                    qkv_bias=qkv_bias,
                                    attn_drop_ratio=attn_drop,
                                    proj_drop_ratio=drop,
                                    name="attn")
        self.drop_path = layers.Dropout(rate=drop_path, noise_shape=(None, 1, 1)) if drop_path > 0. \
            else layers.Activation("linear")
        self.norm2 = layers.LayerNormalization(epsilon=1e-6, name="norm2")
        self.mlp = MLP(dim, drop=drop, name="mlp")

    def call(self, x, attn_mask, training=None):
        H, W = self.H, self.W
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"

        shortcut = x
        x = self.norm1(x)
        x = tf.reshape(x, [B, H, W, C])

        # pad feature maps to multiples of window size
        # 把feature map给pad到window size的整数倍
        pad_r = (self.window_size - W % self.window_size) % self.window_size
        pad_b = (self.window_size - H % self.window_size) % self.window_size
        if pad_r > 0 or pad_b > 0:
            paddings = tf.constant([[0, 0],
                                    [0, pad_r],
                                    [0, pad_b],
                                    [0, 0]])
            x = tf.pad(x, paddings)

        _, Hp, Wp, _ = x.shape

        # cyclic shift
        if self.shift_size > 0:
            shifted_x = tf.roll(x, shift=(-self.shift_size, -self.shift_size), axis=(1, 2))
        else:
            shifted_x = x
            attn_mask = None

        # partition windows
        x_windows = window_partition(shifted_x, self.window_size)  # [nW*B, Mh, Mw, C]
        x_windows = tf.reshape(x_windows, [-1, self.window_size * self.window_size, C])  # [nW*B, Mh*Mw, C]

        # W-MSA/SW-MSA
        attn_windows = self.attn(x_windows, mask=attn_mask, training=training)  # [nW*B, Mh*Mw, C]

        # merge windows
        attn_windows = tf.reshape(attn_windows,
                                  [-1, self.window_size, self.window_size, C])  # [nW*B, Mh, Mw, C]
        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # [B, H', W', C]

        # reverse cyclic shift
        if self.shift_size > 0:
            x = tf.roll(shifted_x, shift=(self.shift_size, self.shift_size), axis=(1, 2))
        else:
            x = shifted_x

        if pad_r > 0 or pad_b > 0:
            # 把前面pad的数据移除掉
            x = tf.slice(x, begin=[0, 0, 0, 0], size=[B, H, W, C])

        x = tf.reshape(x, [B, H * W, C])

        # FFN
        x = shortcut + self.drop_path(x, training=training)
        x = x + self.drop_path(self.mlp(self.norm2(x)), training=training)

        return x


class BasicLayer(layers.Layer):
    """
    A basic Swin Transformer layer for one stage.

    Args:
        dim (int): Number of input channels.
        depth (int): Number of blocks.
        num_heads (int): Number of attention heads.
        window_size (int): Local window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
        downsample (layer.Layer | None, optional): Downsample layer at the end of the layer. Default: None
    """

    def __init__(self, dim, depth, num_heads, window_size,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
                 drop_path=0., downsample=None, name=None):
        super().__init__(name=name)
        self.dim = dim
        self.depth = depth
        self.window_size = window_size
        self.shift_size = window_size // 2

        # build blocks
        self.blocks = [
            SwinTransformerBlock(dim=dim,
                                 num_heads=num_heads,
                                 window_size=window_size,
                                 shift_size=0 if (i % 2 == 0) else self.shift_size,
                                 mlp_ratio=mlp_ratio,
                                 qkv_bias=qkv_bias,
                                 drop=drop,
                                 attn_drop=attn_drop,
                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
                                 name=f"block{i}")
            for i in range(depth)
        ]

        # patch merging layer
        if downsample is not None:
            self.downsample = downsample(dim=dim, name="downsample")
        else:
            self.downsample = None

    def create_mask(self, H, W):
        # calculate attention mask for SW-MSA
        # 保证Hp和Wp是window_size的整数倍
        Hp = int(np.ceil(H / self.window_size)) * self.window_size
        Wp = int(np.ceil(W / self.window_size)) * self.window_size
        # 拥有和feature map一样的通道排列顺序，方便后续window_partition
        img_mask = np.zeros([1, Hp, Wp, 1])  # [1, Hp, Wp, 1]
        h_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        w_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))

        cnt = 0
        for h in h_slices:
            for w in w_slices:
                img_mask[:, h, w, :] = cnt
                cnt += 1

        img_mask = tf.convert_to_tensor(img_mask, dtype=tf.float32)
        mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]
        mask_windows = tf.reshape(mask_windows, [-1, self.window_size * self.window_size])  # [nW, Mh*Mw]
        # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]
        attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2)
        attn_mask = tf.where(attn_mask != 0, -100.0, attn_mask)
        attn_mask = tf.where(attn_mask == 0, 0.0, attn_mask)

        return attn_mask

    def call(self, x, H, W, training=None):
        attn_mask = self.create_mask(H, W)  # [nW, Mh*Mw, Mh*Mw]
        for blk in self.blocks:
            blk.H, blk.W = H, W
            x = blk(x, attn_mask, training=training)

        if self.downsample is not None:
            x = self.downsample(x, H, W)
            H, W = (H + 1) // 2, (W + 1) // 2

        return x, H, W


class SwinTransformer(Model):
    r""" Swin Transformer
        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
          https://arxiv.org/pdf/2103.14030

    Args:
        patch_size (int | tuple(int)): Patch size. Default: 4
        num_classes (int): Number of classes for classification head. Default: 1000
        embed_dim (int): Patch embedding dimension. Default: 96
        depths (tuple(int)): Depth of each Swin Transformer layer.
        num_heads (tuple(int)): Number of attention heads in different layers.
        window_size (int): Window size. Default: 7
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
        drop_rate (float): Dropout rate. Default: 0
        attn_drop_rate (float): Attention dropout rate. Default: 0
        drop_path_rate (float): Stochastic depth rate. Default: 0.1
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
        patch_norm (bool): If True, add normalization after patch embedding. Default: True
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
    """

    def __init__(self, patch_size=4, num_classes=1000,
                 embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24),
                 window_size=7, mlp_ratio=4., qkv_bias=True,
                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
                 norm_layer=layers.LayerNormalization, name=None, **kwargs):
        super().__init__(name=name)

        self.num_classes = num_classes
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.mlp_ratio = mlp_ratio

        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(patch_size=patch_size,
                                      embed_dim=embed_dim,
                                      norm_layer=norm_layer)
        self.pos_drop = layers.Dropout(drop_rate)

        # stochastic depth decay rule
        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]

        # build layers
        self.stage_layers = []
        for i_layer in range(self.num_layers):
            # 注意这里构建的stage和论文图中有些差异
            # 这里的stage不包含该stage的patch_merging层，包含的是下个stage的
            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
                               depth=depths[i_layer],
                               num_heads=num_heads[i_layer],
                               window_size=window_size,
                               mlp_ratio=self.mlp_ratio,
                               qkv_bias=qkv_bias,
                               drop=drop_rate,
                               attn_drop=attn_drop_rate,
                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
                               name=f"layer{i_layer}")
            self.stage_layers.append(layer)

        self.norm = norm_layer(epsilon=1e-6, name="norm")
        self.head = layers.Dense(num_classes,
                                 kernel_initializer=initializers.TruncatedNormal(stddev=0.02),
                                 bias_initializer=initializers.Zeros(),
                                 name="head")

    def call(self, x, training=None):
        x, H, W = self.patch_embed(x)  # x: [B, L, C]
        x = self.pos_drop(x, training=training)

        for layer in self.stage_layers:
            x, H, W = layer(x, H, W, training=training)

        x = self.norm(x)  # [B, L, C]
        x = tf.reduce_mean(x, axis=1)
        x = self.head(x)

        return x


def swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs):
    model = SwinTransformer(patch_size=4,
                            window_size=7,
                            embed_dim=96,
                            depths=(2, 2, 6, 2),
                            num_heads=(3, 6, 12, 24),
                            num_classes=num_classes,
                            name="swin_tiny_patch4_window7",
                            **kwargs)
    return model


def swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=96,
                            depths=(2, 2, 18, 2),
                            num_heads=(3, 6, 12, 24),
                            num_classes=num_classes,
                            name="swin_small_patch4_window7",
                            **kwargs)
    return model


def swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            name="swin_base_patch4_window7",
                            **kwargs)
    return model


def swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            name="swin_base_patch4_window12",
                            **kwargs)
    return model


def swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            name="swin_base_patch4_window7",
                            **kwargs)
    return model


def swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=128,
                            depths=(2, 2, 18, 2),
                            num_heads=(4, 8, 16, 32),
                            num_classes=num_classes,
                            name="swin_base_patch4_window12",
                            **kwargs)
    return model


def swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=7,
                            embed_dim=192,
                            depths=(2, 2, 18, 2),
                            num_heads=(6, 12, 24, 48),
                            num_classes=num_classes,
                            name="swin_large_patch4_window7",
                            **kwargs)
    return model


def swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):
    model = SwinTransformer(in_chans=3,
                            patch_size=4,
                            window_size=12,
                            embed_dim=192,
                            depths=(2, 2, 18, 2),
                            num_heads=(6, 12, 24, 48),
                            num_classes=num_classes,
                            name="swin_large_patch4_window12",
                            **kwargs)
    return model


================================================
FILE: tensorflow_classification/swin_transformer/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt

from model import swin_tiny_patch4_window7_224 as create_model


def main():
    num_classes = 5
    im_height = im_width = 224

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # read image
    img = np.array(img).astype(np.float32)

    # preprocess
    img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=num_classes)
    model.build([1, im_height, im_width, 3])

    weights_path = './save_weights/model.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img, batch_size=1))
    result = tf.keras.layers.Softmax()(result)
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/swin_transformer/train.py
================================================
import os
import re
import datetime
import sys

import tensorflow as tf
from tqdm import tqdm

from model import swin_tiny_patch4_window7_224 as create_model
from utils import generate_ds

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    img_size = 224
    batch_size = 8
    epochs = 10
    num_classes = 5
    freeze_layers = False
    initial_lr = 0.0001
    weight_decay = 1e-5

    log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root,
                                   train_im_width=img_size,
                                   train_im_height=img_size,
                                   batch_size=batch_size,
                                   val_rate=0.2)

    # create model
    model = create_model(num_classes=num_classes)
    model.build((1, img_size, img_size, 3))

    # 下载我提前转好的预训练权重
    # 链接: https://pan.baidu.com/s/1cHVwia2i3wD7-0Ueh2WmrQ  密码: sq8c
    # load weights
    pre_weights_path = './swin_tiny_patch4_window7_224.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    # freeze bottom layers
    if freeze_layers:
        for layer in model.layers:
            if "head" not in layer.name:
                layer.trainable = False
            else:
                print("training {}".format(layer.name))

    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            # cross entropy loss
            ce_loss = loss_object(train_labels, output)

            # l2 loss
            matcher = re.compile(".*(bias|gamma|beta).*")
            l2loss = weight_decay * tf.add_n([
                tf.nn.l2_loss(v)
                for v in model.trainable_variables
                if not matcher.match(v.name)
            ])

            loss = ce_loss + l2loss

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_loss(ce_loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())
        # writing training loss and acc
        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        # writing validation loss and acc
        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            save_name = "./save_weights/model.ckpt"
            model.save_weights(save_name, save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/swin_transformer/trans_weights.py
================================================
import torch
from model import *


def main(weights_path: str,
         model_name: str,
         model: tf.keras.Model):
    var_dict = {v.name.split(':')[0]: v for v in model.weights}

    weights_dict = torch.load(weights_path, map_location="cpu")["model"]
    w_dict = {}
    for k, v in weights_dict.items():
        if "patch_embed" in k:
            k = k.replace(".", "/")
            if "proj" in k:
                k = k.replace("proj/weight", "proj/kernel")
                if len(v.shape) > 1:
                    # conv weights
                    v = np.transpose(v.numpy(), (2, 3, 1, 0)).astype(np.float32)
                    w_dict[k] = v
                else:
                    # bias
                    w_dict[k] = v
            elif "norm" in k:
                k = k.replace("weight", "gamma").replace("bias", "beta")
                w_dict[k] = v
        elif "layers" in k:
            k = k.replace("layers", "layer")
            split_k = k.split(".")
            layer_id = split_k[0] + split_k[1]
            if "block" in k:
                split_k[2] = "block"
                black_id = split_k[2] + split_k[3]
                k = "/".join([layer_id, black_id, *split_k[4:]])
                if "attn" in k or "mlp" in k:
                    k = k.replace("weight", "kernel")
                    if "kernel" in k:
                        v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)
                elif "norm" in k:
                    k = k.replace("weight", "gamma").replace("bias", "beta")
                w_dict[k] = v
            elif "downsample" in k:
                k = "/".join([layer_id, *split_k[2:]])
                if "reduction" in k:
                    k = k.replace("weight", "kernel")
                    if "kernel" in k:
                        v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)
                elif "norm" in k:
                    k = k.replace("weight", "gamma").replace("bias", "beta")
                w_dict[k] = v
        elif "norm" in k:
            k = k.replace(".", "/").replace("weight", "gamma").replace("bias", "beta")
            w_dict[k] = v
        elif "head" in k:
            k = k.replace(".", "/")
            k = k.replace("weight", "kernel")
            if "kernel" in k:
                v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)
            w_dict[k] = v

    for key, var in var_dict.items():
        if key in w_dict:
            if w_dict[key].shape != var.shape:
                msg = "shape mismatch: {}".format(key)
                print(msg)
            else:
                var.assign(w_dict[key], read_value=False)
        else:
            msg = "Not found {} in {}".format(key, weights_path)
            print(msg)

    model.save_weights("./{}.h5".format(model_name))


if __name__ == '__main__':
    model = swin_tiny_patch4_window7_224()
    model.build((1, 224, 224, 3))
    # trained ImageNet-1K
    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
    main(weights_path="./swin_tiny_patch4_window7_224.pth",
         model_name="swin_tiny_patch4_window7_224",
         model=model)

    # model = swin_small_patch4_window7_224()
    # model.build((1, 224, 224, 3))
    # # trained ImageNet-1K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth
    # main(weights_path="./swin_small_patch4_window7_224.pth",
    #      model_name="swin_small_patch4_window7_224",
    #      model=model)

    # model = swin_base_patch4_window7_224()
    # model.build((1, 224, 224, 3))
    # # trained ImageNet-1K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth
    # main(weights_path="./swin_base_patch4_window7_224.pth",
    #      model_name="swin_base_patch4_window7_224",
    #      model=model)

    # model = swin_base_patch4_window12_384()
    # model.build((1, 384, 384, 3))
    # # trained ImageNet-1K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth
    # main(weights_path="./swin_base_patch4_window12_384.pth",
    #      model_name="swin_base_patch4_window12_384",
    #      model=model)

    # model = swin_base_patch4_window7_224_in22k()
    # model.build((1, 224, 224, 3))
    # # trained ImageNet-22K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth
    # main(weights_path="./swin_base_patch4_window7_224_22k.pth",
    #      model_name="swin_base_patch4_window7_224_22k",
    #      model=model)

    # model = swin_base_patch4_window12_384_in22k()
    # model.build((1, 384, 384, 3))
    # # trained ImageNet-22K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth
    # main(weights_path="./swin_base_patch4_window12_384_22k.pth",
    #      model_name="swin_base_patch4_window12_384_22k",
    #      model=model)

    # model = swin_large_patch4_window7_224_in22k()
    # model.build((1, 224, 224, 3))
    # # trained ImageNet-22K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth
    # main(weights_path="./swin_large_patch4_window7_224_22k.pth",
    #      model_name="swin_large_patch4_window7_224_22k",
    #      model=model)

    # model = swin_large_patch4_window12_384_in22k()
    # model.build((1, 384, 384, 3))
    # # trained ImageNet-22K
    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth
    # main(weights_path="./swin_large_patch4_window12_384_22k.pth",
    #      model_name="swin_large_patch4_window12_384_22k",
    #      model=model)


================================================
FILE: tensorflow_classification/swin_transformer/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                train_im_height: int = 224,
                train_im_width: int = 224,
                val_im_height: int = None,
                val_im_width: int = None,
                batch_size: int = 8,
                val_rate: float = 0.1,
                cache_data: bool = False):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param train_im_height: 训练输入网络图像的高度
    :param train_im_width:  训练输入网络图像的宽度
    :param val_im_height: 验证输入网络图像的高度
    :param val_im_width:  验证输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :param cache_data: 是否缓存数据
    :return:
    """
    assert train_im_height is not None
    assert train_im_width is not None
    if val_im_width is None:
        val_im_width = train_im_width
    if val_im_height is None:
        val_im_height = train_im_height

    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)
        image = tf.image.random_flip_left_right(image)
        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)
        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False,
                                  cache: bool = False):
        if cache:
            ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val, cache=False)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/tensorboard_test/train_fit.py
================================================
import json
import os
import math
import datetime

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    num_classes = 5
    im_height = 224
    im_width = 224
    batch_size = 8
    epochs = 20
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(horizontal_flip=True)

    validation_image_generator = ImageDataGenerator()

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')

    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    # create base model
    base_model = tf.keras.applications.ResNet50(include_top=False,
                                                input_shape=(224, 224, 3),
                                                weights='imagenet')
    # freeze base model
    base_model.trainable = False
    base_model.summary()

    # create new model on top
    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = tf.keras.applications.resnet50.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(num_classes)(x)
    model = tf.keras.Model(inputs, outputs)
    model.summary()

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=[tf.keras.metrics.CategoricalAccuracy("accuracy")])

    # 自定义学习率变化
    def scheduler(epoch):
        initial_lr = 0.01
        end_lr = 0.001
        rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr  # cosine
        new_lr = rate * initial_lr

        return new_lr

    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/model_{epoch}.h5',
                                                    save_best_only=True,
                                                    save_weights_only=True,
                                                    monitor=tf.keras.metrics.CategoricalAccuracy("accuracy").name),
                 tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                write_graph=True,
                                                histogram_freq=1),
                 tf.keras.callbacks.LearningRateScheduler(schedule=scheduler)]

    model.fit(x=train_data_gen,
              epochs=epochs,
              validation_data=val_data_gen,
              callbacks=callbacks)


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/tensorboard_test/train_not_fit.py
================================================
import json
import os
import math
import datetime

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    num_classes = 5
    im_height = 224
    im_width = 224
    batch_size = 16
    epochs = 20
    log_dir = "./logs/not_fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(horizontal_flip=True)

    validation_image_generator = ImageDataGenerator()

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')

    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    # create base model
    base_model = tf.keras.applications.ResNet50(include_top=False,
                                                input_shape=(224, 224, 3),
                                                weights='imagenet')
    # freeze base model
    base_model.trainable = False
    base_model.summary()

    # create new model on top
    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = tf.keras.applications.resnet50.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(num_classes)(x)
    model = tf.keras.Model(inputs, outputs)
    model.summary()

    # 自定义学习率变化
    def scheduler(epoch):
        initial_lr = 0.01
        end_lr = 0.001
        rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr  # cosine
        new_lr = rate * initial_lr

        with train_writer.as_default():
            tf.summary.scalar('learning rate', data=new_lr, step=epoch)

        return new_lr

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            output = model(images, training=True)
            loss = loss_object(labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

    @tf.function
    def test_step(images, labels):
        output = model(images, training=False)
        t_loss = loss_object(labels, output)

        val_loss(t_loss)
        val_accuracy(labels, output)

    best_val_accuracy = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        print("Epoch [{}/{}]".format(epoch + 1, epochs))
        # train
        train_bar = tqdm(train_data_gen, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train_loss:{:.3f}, train_acc:{:.3f}".format(train_loss.result(),
                                                                          train_accuracy.result())

        # update learning rate
        optimizer.learning_rate = scheduler(epoch)

        # validation
        val_bar = tqdm(val_data_gen, file=sys.stdout)
        for test_images, test_labels in val_bar:
            test_step(test_images, test_labels)

            # print val process
            val_bar.desc = "val_loss:{:.3f}, val_acc:{:.3f}".format(val_loss.result(),
                                                                    val_accuracy.result())

        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        if val_accuracy.result() > best_val_accuracy:
            best_val_accuracy = val_accuracy.result()
            model.save_weights("./save_weights/model_{}.ckpt".format(epoch), save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/vision_transformer/predict.py
================================================
import os
import json
import glob
import numpy as np

from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt

from vit_model import vit_base_patch16_224_in21k as create_model


def main():
    num_classes = 5
    im_height = im_width = 224

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    # resize image
    img = img.resize((im_width, im_height))
    plt.imshow(img)

    # read image
    img = np.array(img).astype(np.float32)

    # preprocess
    img = (img / 255. - 0.5) / 0.5

    # Add the image to a batch where it's the only member.
    img = (np.expand_dims(img, 0))

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=num_classes, has_logits=False)
    model.build([1, 224, 224, 3])

    weights_path = './save_weights/model.ckpt'
    assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path)
    model.load_weights(weights_path)

    result = np.squeeze(model.predict(img, batch_size=1))
    result = tf.keras.layers.Softmax()(result)
    predict_class = np.argmax(result)

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_class)],
                                                 result[predict_class])
    plt.title(print_res)
    for i in range(len(result)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  result[i]))
    plt.show()


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/vision_transformer/train.py
================================================
import os
import re
import sys
import math
import datetime

import tensorflow as tf
from tqdm import tqdm

from vit_model import vit_base_patch16_224_in21k as create_model
from utils import generate_ds

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    data_root = "/data/flower_photos"  # get data root path

    if not os.path.exists("./save_weights"):
        os.makedirs("./save_weights")

    batch_size = 8
    epochs = 10
    num_classes = 5
    freeze_layers = True
    initial_lr = 0.001
    weight_decay = 1e-4

    log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

    # data generator with data augmentation
    train_ds, val_ds = generate_ds(data_root, batch_size=batch_size, val_rate=0.2)

    # create model
    model = create_model(num_classes=num_classes, has_logits=False)
    model.build((1, 224, 224, 3))

    # 下载我提前转好的预训练权重
    # 链接: https://pan.baidu.com/s/1ro-6bebc8zroYfupn-7jVQ  密码: s9d9
    # load weights
    pre_weights_path = './ViT-B_16.h5'
    assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

    # freeze bottom layers
    if freeze_layers:
        for layer in model.layers:
            if "pre_logits" not in layer.name and "head" not in layer.name:
                layer.trainable = False
            else:
                print("training {}".format(layer.name))

    model.summary()

    # custom learning rate curve
    def scheduler(now_epoch):
        end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate
        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine
        new_lr = rate * initial_lr

        # writing lr into tensorboard
        with train_writer.as_default():
            tf.summary.scalar('learning rate', data=new_lr, step=epoch)

        return new_lr

    # using keras low level api for training
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    @tf.function
    def train_step(train_images, train_labels):
        with tf.GradientTape() as tape:
            output = model(train_images, training=True)
            # cross entropy loss
            ce_loss = loss_object(train_labels, output)

            # l2 loss
            matcher = re.compile(".*(bias|gamma|beta).*")
            l2loss = weight_decay * tf.add_n([
                tf.nn.l2_loss(v)
                for v in model.trainable_variables
                if not matcher.match(v.name)
            ])

            loss = ce_loss + l2loss

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_loss(ce_loss)
        train_accuracy(train_labels, output)

    @tf.function
    def val_step(val_images, val_labels):
        output = model(val_images, training=False)
        loss = loss_object(val_labels, output)

        val_loss(loss)
        val_accuracy(val_labels, output)

    best_val_acc = 0.
    for epoch in range(epochs):
        train_loss.reset_states()  # clear history info
        train_accuracy.reset_states()  # clear history info
        val_loss.reset_states()  # clear history info
        val_accuracy.reset_states()  # clear history info

        # train
        train_bar = tqdm(train_ds, file=sys.stdout)
        for images, labels in train_bar:
            train_step(images, labels)

            # print train process
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                                 epochs,
                                                                                 train_loss.result(),
                                                                                 train_accuracy.result())

        # update learning rate
        optimizer.learning_rate = scheduler(epoch)

        # validate
        val_bar = tqdm(val_ds, file=sys.stdout)
        for images, labels in val_bar:
            val_step(images, labels)

            # print val process
            val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
                                                                               epochs,
                                                                               val_loss.result(),
                                                                               val_accuracy.result())
        # writing training loss and acc
        with train_writer.as_default():
            tf.summary.scalar("loss", train_loss.result(), epoch)
            tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

        # writing validation loss and acc
        with val_writer.as_default():
            tf.summary.scalar("loss", val_loss.result(), epoch)
            tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

        # only save best weights
        if val_accuracy.result() > best_val_acc:
            best_val_acc = val_accuracy.result()
            save_name = "./save_weights/model.ckpt"
            model.save_weights(save_name, save_format="tf")


if __name__ == '__main__':
    main()


================================================
FILE: tensorflow_classification/vision_transformer/trans_weights.py
================================================
from vit_model import *


def main(weights_path: str,
         model_name: str,
         model: tf.keras.Model):
    var_dict = {v.name.split(':')[0]: v for v in model.weights}

    ckpt_dict = np.load(weights_path, allow_pickle=False)
    # keys, values = zip(*list(ckpt_dict.items()))
    w_dict = {}
    for k, v in ckpt_dict.items():
        key_ = k.replace("Transformer/", "").\
            replace("MultiHeadDotProductAttention_1", "MultiHeadAttention").\
            replace("MlpBlock_3", "MlpBlock").\
            replace("posembed_input/pos_embedding", "pos_embed").\
            replace("encoder_norm/bias", "encoder_norm/beta").\
            replace("encoder_norm/scale", "encoder_norm/gamma").\
            replace("LayerNorm_0/bias", "LayerNorm_0/beta").\
            replace("LayerNorm_0/scale", "LayerNorm_0/gamma"). \
            replace("LayerNorm_2/bias", "LayerNorm_1/beta"). \
            replace("LayerNorm_2/scale", "LayerNorm_1/gamma").\
            replace("embedding", "patch_embed/conv2d")
        w_dict[key_] = v

    for i in range(model.depth):
        q_kernel = w_dict.pop("encoderblock_{}/MultiHeadAttention/query/kernel".format(i))
        k_kernel = w_dict.pop("encoderblock_{}/MultiHeadAttention/key/kernel".format(i))
        v_kernel = w_dict.pop("encoderblock_{}/MultiHeadAttention/value/kernel".format(i))
        q_kernel = np.reshape(q_kernel, [q_kernel.shape[0], -1])
        k_kernel = np.reshape(k_kernel, [k_kernel.shape[0], -1])
        v_kernel = np.reshape(v_kernel, [v_kernel.shape[0], -1])
        qkv_kernel = np.concatenate([q_kernel, k_kernel, v_kernel], axis=1)
        w_dict["encoderblock_{}/MultiHeadAttention/qkv/kernel".format(i)] = qkv_kernel

        if model.qkv_bias:
            q_bias = w_dict.pop("encoderblock_{}/MultiHeadAttention/query/bias".format(i))
            k_bias = w_dict.pop("encoderblock_{}/MultiHeadAttention/key/bias".format(i))
            v_bias = w_dict.pop("encoderblock_{}/MultiHeadAttention/value/bias".format(i))
            q_bias = np.reshape(q_bias, [-1])
            k_bias = np.reshape(k_bias, [-1])
            v_bias = np.reshape(v_bias, [-1])
            qkv_bias = np.concatenate([q_bias, k_bias, v_bias], axis=0)
            w_dict["encoderblock_{}/MultiHeadAttention/qkv/bias".format(i)] = qkv_bias

        out_kernel = w_dict["encoderblock_{}/MultiHeadAttention/out/kernel".format(i)]
        out_kernel = np.reshape(out_kernel, [-1, out_kernel.shape[-1]])
        w_dict["encoderblock_{}/MultiHeadAttention/out/kernel".format(i)] = out_kernel

    for key, var in var_dict.items():
        if key in w_dict:
            if w_dict[key].shape != var.shape:
                msg = "shape mismatch: {}".format(key)
                print(msg)
            else:
                var.assign(w_dict[key], read_value=False)
        else:
            msg = "Not found {} in {}".format(key, weights_path)
            print(msg)

    model.save_weights("./{}.h5".format(model_name))


if __name__ == '__main__':
    model = vit_base_patch16_224_in21k()
    model.build((1, 224, 224, 3))
    # https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_16.npz
    main(weights_path="./ViT-B_16.npz",
         model_name="ViT-B_16",
         model=model)

    # model = vit_base_patch32_224_in21k()
    # model.build((1, 224, 224, 3))
    # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_32.npz
    # main(weights_path="./ViT-B_32.npz",
    #      model_name="ViT-B_32",
    #      model=model)

    # model = vit_large_patch16_224_in21k()
    # model.build((1, 224, 224, 3))
    # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-L_16.npz
    # main(weights_path="./ViT-L_16.npz",
    #      model_name="ViT-L_16",
    #      model=model)

    # model = vit_large_patch32_224_in21k()
    # model.build((1, 224, 224, 3))
    # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-L_32.npz
    # main(weights_path="./ViT-L_32.npz",
    #      model_name="ViT-L_32",
    #      model=model)


================================================
FILE: tensorflow_classification/vision_transformer/utils.py
================================================
import os
import json
import random

import tensorflow as tf
import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机划分结果一致
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".jpeg", ".JPEG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num),
                                                                                            len(train_images_path),
                                                                                            len(val_images_path)
                                                                                            ))

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def generate_ds(data_root: str,
                train_im_height: int = 224,
                train_im_width: int = 224,
                val_im_height: int = None,
                val_im_width: int = None,
                batch_size: int = 8,
                val_rate: float = 0.1,
                cache_data: bool = False):
    """
    读取划分数据集，并生成训练集和验证集的迭代器
    :param data_root: 数据根目录
    :param train_im_height: 训练输入网络图像的高度
    :param train_im_width:  训练输入网络图像的宽度
    :param val_im_height: 验证输入网络图像的高度
    :param val_im_width:  验证输入网络图像的宽度
    :param batch_size: 训练使用的batch size
    :param val_rate:  将数据按给定比例划分到验证集
    :param cache_data: 是否缓存数据
    :return:
    """
    assert train_im_height is not None
    assert train_im_width is not None
    if val_im_width is None:
        val_im_width = train_im_width
    if val_im_height is None:
        val_im_height = train_im_height

    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    def process_train_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)
        image = tf.image.random_flip_left_right(image)
        image = (image / 255. - 0.5) / 0.5
        return image, label

    def process_val_info(img_path, label):
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)
        image = (image / 255. - 0.5) / 0.5
        return image, label

    # Configure dataset for performance
    def configure_for_performance(ds,
                                  shuffle_size: int,
                                  shuffle: bool = False,
                                  cache: bool = False):
        if cache:
            ds = ds.cache()  # 读取数据后缓存至内存
        if shuffle:
            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序
        ds = ds.batch(batch_size)                      # 指定batch size
        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据
        return ds

    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                   tf.constant(train_img_label)))
    total_train = len(train_img_path)

    # Use Dataset.map to create a dataset of image, label pairs
    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)
    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)

    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                 tf.constant(val_img_label)))
    total_val = len(val_img_path)
    # Use Dataset.map to create a dataset of image, label pairs
    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)
    val_ds = configure_for_performance(val_ds, total_val, cache=False)

    return train_ds, val_ds


================================================
FILE: tensorflow_classification/vision_transformer/vit_model.py
================================================
"""
refer to:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
"""
import tensorflow as tf
from tensorflow.keras import Model, layers, initializers
import numpy as np


class PatchEmbed(layers.Layer):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, img_size=224, patch_size=16, embed_dim=768):
        super(PatchEmbed, self).__init__()
        self.embed_dim = embed_dim
        self.img_size = (img_size, img_size)
        self.grid_size = (img_size // patch_size, img_size // patch_size)
        self.num_patches = self.grid_size[0] * self.grid_size[1]

        self.proj = layers.Conv2D(filters=embed_dim, kernel_size=patch_size,
                                  strides=patch_size, padding='SAME',
                                  kernel_initializer=initializers.LecunNormal(),
                                  bias_initializer=initializers.Zeros())

    def call(self, inputs, **kwargs):
        B, H, W, C = inputs.shape
        assert H == self.img_size[0] and W == self.img_size[1], \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
        x = self.proj(inputs)
        # [B, H, W, C] -> [B, H*W, C]
        x = tf.reshape(x, [B, self.num_patches, self.embed_dim])
        return x


class ConcatClassTokenAddPosEmbed(layers.Layer):
    def __init__(self, embed_dim=768, num_patches=196, name=None):
        super(ConcatClassTokenAddPosEmbed, self).__init__(name=name)
        self.embed_dim = embed_dim
        self.num_patches = num_patches

    def build(self, input_shape):
        self.cls_token = self.add_weight(name="cls",
                                         shape=[1, 1, self.embed_dim],
                                         initializer=initializers.Zeros(),
                                         trainable=True,
                                         dtype=tf.float32)
        self.pos_embed = self.add_weight(name="pos_embed",
                                         shape=[1, self.num_patches + 1, self.embed_dim],
                                         initializer=initializers.RandomNormal(stddev=0.02),
                                         trainable=True,
                                         dtype=tf.float32)

    def call(self, inputs, **kwargs):
        batch_size, _, _ = inputs.shape

        # [1, 1, 768] -> [B, 1, 768]
        cls_token = tf.broadcast_to(self.cls_token, shape=[batch_size, 1, self.embed_dim])
        x = tf.concat([cls_token, inputs], axis=1)  # [B, 197, 768]
        x = x + self.pos_embed

        return x


class Attention(layers.Layer):
    k_ini = initializers.GlorotUniform()
    b_ini = initializers.Zeros()

    def __init__(self,
                 dim,
                 num_heads=8,
                 qkv_bias=False,
                 qk_scale=None,
                 attn_drop_ratio=0.,
                 proj_drop_ratio=0.,
                 name=None):
        super(Attention, self).__init__(name=name)
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5
        self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias, name="qkv",
                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.attn_drop = layers.Dropout(attn_drop_ratio)
        self.proj = layers.Dense(dim, name="out",
                                 kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.proj_drop = layers.Dropout(proj_drop_ratio)

    def call(self, inputs, training=None):
        # [batch_size, num_patches + 1, total_embed_dim]
        B, N, C = inputs.shape

        # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]
        qkv = self.qkv(inputs)
        # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]
        qkv = tf.reshape(qkv, [B, N, 3, self.num_heads, C // self.num_heads])
        # transpose: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        qkv = tf.transpose(qkv, [2, 0, 3, 1, 4])
        # [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        q, k, v = qkv[0], qkv[1], qkv[2]

        # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]
        # multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]
        attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale
        attn = tf.nn.softmax(attn, axis=-1)
        attn = self.attn_drop(attn, training=training)

        # multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
        x = tf.matmul(attn, v)
        # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]
        x = tf.transpose(x, [0, 2, 1, 3])
        # reshape: -> [batch_size, num_patches + 1, total_embed_dim]
        x = tf.reshape(x, [B, N, C])

        x = self.proj(x)
        x = self.proj_drop(x, training=training)
        return x


class MLP(layers.Layer):
    """
    MLP as used in Vision Transformer, MLP-Mixer and related networks
    """

    k_ini = initializers.GlorotUniform()
    b_ini = initializers.RandomNormal(stddev=1e-6)

    def __init__(self, in_features, mlp_ratio=4.0, drop=0., name=None):
        super(MLP, self).__init__(name=name)
        self.fc1 = layers.Dense(int(in_features * mlp_ratio), name="Dense_0",
                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.act = layers.Activation("gelu")
        self.fc2 = layers.Dense(in_features, name="Dense_1",
                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)
        self.drop = layers.Dropout(drop)

    def call(self, inputs, training=None):
        x = self.fc1(inputs)
        x = self.act(x)
        x = self.drop(x, training=training)
        x = self.fc2(x)
        x = self.drop(x, training=training)
        return x


class Block(layers.Layer):
    def __init__(self,
                 dim,
                 num_heads=8,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_ratio=0.,
                 attn_drop_ratio=0.,
                 drop_path_ratio=0.,
                 name=None):
        super(Block, self).__init__(name=name)
        self.norm1 = layers.LayerNormalization(epsilon=1e-6, name="LayerNorm_0")
        self.attn = Attention(dim, num_heads=num_heads,
                              qkv_bias=qkv_bias, qk_scale=qk_scale,
                              attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio,
                              name="MultiHeadAttention")
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = layers.Dropout(rate=drop_path_ratio, noise_shape=(None, 1, 1)) if drop_path_ratio > 0. \
            else layers.Activation("linear")
        self.norm2 = layers.LayerNormalization(epsilon=1e-6, name="LayerNorm_1")
        self.mlp = MLP(dim, drop=drop_ratio, name="MlpBlock")

    def call(self, inputs, training=None):
        x = inputs + self.drop_path(self.attn(self.norm1(inputs)), training=training)
        x = x + self.drop_path(self.mlp(self.norm2(x)), training=training)
        return x


class VisionTransformer(Model):
    def __init__(self, img_size=224, patch_size=16, embed_dim=768,
                 depth=12, num_heads=12, qkv_bias=True, qk_scale=None,
                 drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0.,
                 representation_size=None, num_classes=1000, name="ViT-B/16"):
        super(VisionTransformer, self).__init__(name=name)
        self.num_classes = num_classes
        self.embed_dim = embed_dim
        self.depth = depth
        self.qkv_bias = qkv_bias

        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches
        self.cls_token_pos_embed = ConcatClassTokenAddPosEmbed(embed_dim=embed_dim,
                                                               num_patches=num_patches,
                                                               name="cls_pos")

        self.pos_drop = layers.Dropout(drop_ratio)

        dpr = np.linspace(0., drop_path_ratio, depth)  # stochastic depth decay rule
        self.blocks = [Block(dim=embed_dim, num_heads=num_heads, qkv_bias=qkv_bias,
                             qk_scale=qk_scale, drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio,
                             drop_path_ratio=dpr[i], name="encoderblock_{}".format(i))
                       for i in range(depth)]

        self.norm = layers.LayerNormalization(epsilon=1e-6, name="encoder_norm")

        if representation_size:
            self.has_logits = True
            self.pre_logits = layers.Dense(representation_size, activation="tanh", name="pre_logits")
        else:
            self.has_logits = False
            self.pre_logits = layers.Activation("linear")

        self.head = layers.Dense(num_classes, name="head", kernel_initializer=initializers.Zeros())

    def call(self, inputs, training=None):
        # [B, H, W, C] -> [B, num_patches, embed_dim]
        x = self.patch_embed(inputs)  # [B, 196, 768]
        x = self.cls_token_pos_embed(x)  # [B, 176, 768]
        x = self.pos_drop(x, training=training)

        for block in self.blocks:
            x = block(x, training=training)

        x = self.norm(x)
        x = self.pre_logits(x[:, 0])
        x = self.head(x)

        return x


def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=768 if has_logits else None,
                              num_classes=num_classes,
                              name="ViT-B_16")
    return model


def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=768,
                              depth=12,
                              num_heads=12,
                              representation_size=768 if has_logits else None,
                              num_classes=num_classes,
                              name="ViT-B_32")
    return model


def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=16,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=1024 if has_logits else None,
                              num_classes=num_classes,
                              name="ViT-L_16")
    return model


def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=32,
                              embed_dim=1024,
                              depth=24,
                              num_heads=16,
                              representation_size=1024 if has_logits else None,
                              num_classes=num_classes,
                              name="ViT-L_32")
    return model


def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):
    """
    ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
    """
    model = VisionTransformer(img_size=224,
                              patch_size=14,
                              embed_dim=1280,
                              depth=32,
                              num_heads=16,
                              representation_size=1280 if has_logits else None,
                              num_classes=num_classes,
                              name="ViT-H_14")
    return model