Repository: WZMIAOMIAO/deep-learning-for-image-processing Branch: master Commit: 1ec3fe6f374f Files: 593 Total size: 3.3 MB Directory structure: gitextract_1s0rnibl/ ├── .github/ │ └── ISSUE_TEMPLATE/ │ └── issue-template.md ├── .gitignore ├── LICENSE ├── README.md ├── article_link/ │ └── README.md ├── course_ppt/ │ └── README.md ├── data_set/ │ ├── README.md │ └── split_data.py ├── deploying_service/ │ ├── deploying_pytorch/ │ │ ├── convert_onnx_cls/ │ │ │ ├── class_indices.json │ │ │ ├── main.py │ │ │ └── model.py │ │ ├── convert_openvino/ │ │ │ ├── convert_resnet34/ │ │ │ │ ├── README.md │ │ │ │ ├── compare_fps.py │ │ │ │ ├── compare_onnx_and_ir.py │ │ │ │ ├── convert_pytorch2onnx.py │ │ │ │ ├── model.py │ │ │ │ ├── quantization_int8.py │ │ │ │ ├── requirements.txt │ │ │ │ └── utils.py │ │ │ └── convert_yolov5/ │ │ │ ├── README.md │ │ │ ├── compare_fps.py │ │ │ ├── compare_onnx_and_ir.py │ │ │ ├── draw_box_utils.py │ │ │ ├── evaluation.py │ │ │ ├── predict.py │ │ │ ├── quantization_int8.py │ │ │ ├── requirements.txt │ │ │ └── utils.py │ │ ├── convert_tensorrt/ │ │ │ └── convert_resnet34/ │ │ │ ├── compare_onnx_and_trt.py │ │ │ ├── convert_pytorch2onnx.py │ │ │ ├── my_dataset.py │ │ │ ├── quantization.py │ │ │ └── utils.py │ │ └── pytorch_flask_service/ │ │ ├── class_indices.json │ │ ├── main.py │ │ ├── model.py │ │ ├── requirements.txt │ │ └── templates/ │ │ └── up.html │ └── pruning_model_pytorch/ │ ├── class_indices.json │ ├── main.py │ ├── model.py │ ├── predict.py │ └── train.py ├── others_project/ │ ├── draw_dilated_conv/ │ │ └── main.py │ ├── kmeans_anchors/ │ │ ├── main.py │ │ ├── plot_kmeans.py │ │ ├── read_voc.py │ │ └── yolo_kmeans.py │ ├── openvinotest/ │ │ └── openvino_cls_test/ │ │ ├── class_indices.json │ │ ├── create_imagenet_annotation.py │ │ ├── float32vsint8.py │ │ ├── main.py │ │ ├── model.py │ │ └── speed_test.py │ ├── readPbFile/ │ │ ├── README.md │ │ ├── pascal_label_map.pbtxt │ │ ├── readPb.py │ │ ├── test_images/ │ │ │ └── image_info.txt │ │ └── using_function.py │ ├── textcnnKeras/ │ │ ├── dataGenerator.py │ │ ├── data_link.txt │ │ ├── main.py │ │ └── models.py │ └── trans_widerface_to_xml/ │ ├── create_xml.py │ └── main.py ├── pytorch_classification/ │ ├── ConfusionMatrix/ │ │ ├── class_indices.json │ │ ├── main.py │ │ └── model.py │ ├── ConvNeXt/ │ │ ├── README.md │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── train.py │ │ └── utils.py │ ├── MobileViT/ │ │ ├── README.md │ │ ├── model.py │ │ ├── model_config.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── train.py │ │ ├── transformer.py │ │ ├── unfold_test.py │ │ └── utils.py │ ├── README.md │ ├── Test10_regnet/ │ │ ├── README.md │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── pretrain_weights.py │ │ ├── train.py │ │ └── utils.py │ ├── Test11_efficientnetV2/ │ │ ├── README.md │ │ ├── class_indices.json │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── train.py │ │ ├── trans_effv2_weights.py │ │ └── utils.py │ ├── Test1_official_demo/ │ │ ├── model.py │ │ ├── predict.py │ │ └── train.py │ ├── Test2_alexnet/ │ │ ├── class_indices.json │ │ ├── model.py │ │ ├── predict.py │ │ └── train.py │ ├── Test3_vggnet/ │ │ ├── class_indices.json │ │ ├── model.py │ │ ├── predict.py │ │ └── train.py │ ├── Test4_googlenet/ │ │ ├── class_indices.json │ │ ├── model.py │ │ ├── predict.py │ │ └── train.py │ ├── Test5_resnet/ │ │ ├── README.md │ │ ├── batch_predict.py │ │ ├── class_indices.json │ │ ├── load_weights.py │ │ ├── model.py │ │ ├── predict.py │ │ └── train.py │ ├── Test6_mobilenet/ │ │ ├── class_indices.json │ │ ├── model_v2.py │ │ ├── model_v3.py │ │ ├── predict.py │ │ └── train.py │ ├── Test7_shufflenet/ │ │ ├── README.md │ │ ├── class_indices.json │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── train.py │ │ └── utils.py │ ├── Test8_densenet/ │ │ ├── README.md │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── train.py │ │ └── utils.py │ ├── Test9_efficientNet/ │ │ ├── README.md │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── train.py │ │ ├── trans_weights_to_pytorch.py │ │ └── utils.py │ ├── analyze_weights_featuremap/ │ │ ├── alexnet_model.py │ │ ├── analyze_feature_map.py │ │ ├── analyze_kernel_weight.py │ │ └── resnet_model.py │ ├── custom_dataset/ │ │ ├── main.py │ │ ├── my_dataset.py │ │ └── utils.py │ ├── grad_cam/ │ │ ├── README.md │ │ ├── imagenet1k_classes.txt │ │ ├── imagenet21k_classes.txt │ │ ├── main_cnn.py │ │ ├── main_swin.py │ │ ├── main_vit.py │ │ ├── swin_model.py │ │ ├── utils.py │ │ └── vit_model.py │ ├── mini_imagenet/ │ │ ├── README.md │ │ ├── imagenet_class_index.json │ │ ├── model.py │ │ ├── multi_train_utils/ │ │ │ ├── __init__.py │ │ │ ├── distributed_utils.py │ │ │ └── train_eval_utils.py │ │ ├── my_dataset.py │ │ ├── restructure_csv.py │ │ ├── train_multi_gpu_using_launch.py │ │ └── train_single_gpu.py │ ├── model_complexity/ │ │ ├── main.py │ │ ├── model.py │ │ └── utils.py │ ├── swin_transformer/ │ │ ├── README.md │ │ ├── create_confusion_matrix.py │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── select_incorrect_samples.py │ │ ├── train.py │ │ └── utils.py │ ├── tensorboard_test/ │ │ ├── data_utils.py │ │ ├── model.py │ │ ├── my_dataset.py │ │ ├── requirements.txt │ │ ├── train.py │ │ └── train_eval_utils.py │ ├── train_multi_GPU/ │ │ ├── README.md │ │ ├── model.py │ │ ├── multi_train_utils/ │ │ │ ├── distributed_utils.py │ │ │ └── train_eval_utils.py │ │ ├── my_dataset.py │ │ ├── plot_results.py │ │ ├── requirements.txt │ │ ├── train_multi_gpu_using_launch.py │ │ ├── train_multi_gpu_using_spawn.py │ │ ├── train_single_gpu.py │ │ └── utils.py │ └── vision_transformer/ │ ├── README.md │ ├── flops.py │ ├── my_dataset.py │ ├── predict.py │ ├── train.py │ ├── utils.py │ └── vit_model.py ├── pytorch_keypoint/ │ ├── DeepPose/ │ │ ├── README.md │ │ ├── datasets.py │ │ ├── export_onnx.py │ │ ├── model.py │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── distributed_utils.py │ │ │ ├── losses.py │ │ │ ├── metrics.py │ │ │ └── train_eval_utils.py │ │ ├── transforms.py │ │ ├── utils.py │ │ └── wflw_horizontal_flip_indices.py │ └── HRNet/ │ ├── README.md │ ├── draw_utils.py │ ├── model/ │ │ ├── __init__.py │ │ └── hrnet.py │ ├── my_dataset_coco.py │ ├── person_keypoints.json │ ├── plot_curve.py │ ├── predict.py │ ├── requirements.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils/ │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ ├── loss.py │ │ └── train_eval_utils.py │ ├── transforms.py │ └── validation.py ├── pytorch_object_detection/ │ ├── faster_rcnn/ │ │ ├── README.md │ │ ├── backbone/ │ │ │ ├── __init__.py │ │ │ ├── feature_pyramid_network.py │ │ │ ├── mobilenetv2_model.py │ │ │ ├── resnet50_fpn_model.py │ │ │ └── vgg_model.py │ │ ├── change_backbone_with_fpn.py │ │ ├── change_backbone_without_fpn.py │ │ ├── draw_box_utils.py │ │ ├── my_dataset.py │ │ ├── network_files/ │ │ │ ├── __init__.py │ │ │ ├── boxes.py │ │ │ ├── det_utils.py │ │ │ ├── faster_rcnn_framework.py │ │ │ ├── image_list.py │ │ │ ├── roi_head.py │ │ │ ├── rpn_function.py │ │ │ └── transform.py │ │ ├── pascal_voc_classes.json │ │ ├── plot_curve.py │ │ ├── predict.py │ │ ├── record_mAP.txt │ │ ├── requirements.txt │ │ ├── split_data.py │ │ ├── train_mobilenetv2.py │ │ ├── train_multi_GPU.py │ │ ├── train_res50_fpn.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── coco_eval.py │ │ │ ├── coco_utils.py │ │ │ ├── distributed_utils.py │ │ │ ├── group_by_aspect_ratio.py │ │ │ └── train_eval_utils.py │ │ ├── transforms.py │ │ └── validation.py │ ├── mask_rcnn/ │ │ ├── README.md │ │ ├── backbone/ │ │ │ ├── __init__.py │ │ │ ├── feature_pyramid_network.py │ │ │ └── resnet50_fpn_model.py │ │ ├── coco91_indices.json │ │ ├── det_results20220406-141544.txt │ │ ├── draw_box_utils.py │ │ ├── my_dataset_coco.py │ │ ├── my_dataset_voc.py │ │ ├── network_files/ │ │ │ ├── __init__.py │ │ │ ├── boxes.py │ │ │ ├── det_utils.py │ │ │ ├── faster_rcnn_framework.py │ │ │ ├── image_list.py │ │ │ ├── mask_rcnn.py │ │ │ ├── roi_head.py │ │ │ ├── rpn_function.py │ │ │ └── transform.py │ │ ├── pascal_voc_indices.json │ │ ├── plot_curve.py │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── seg_results20220406-141544.txt │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── coco_eval.py │ │ │ ├── coco_utils.py │ │ │ ├── distributed_utils.py │ │ │ ├── group_by_aspect_ratio.py │ │ │ └── train_eval_utils.py │ │ ├── transforms.py │ │ └── validation.py │ ├── retinaNet/ │ │ ├── README.md │ │ ├── backbone/ │ │ │ ├── __init__.py │ │ │ ├── feature_pyramid_network.py │ │ │ └── resnet50_fpn_model.py │ │ ├── draw_box_utils.py │ │ ├── my_dataset.py │ │ ├── network_files/ │ │ │ ├── __init__.py │ │ │ ├── anchor_utils.py │ │ │ ├── boxes.py │ │ │ ├── det_utils.py │ │ │ ├── image_list.py │ │ │ ├── losses.py │ │ │ ├── retinanet.py │ │ │ └── transform.py │ │ ├── pascal_voc_classes.json │ │ ├── plot_curve.py │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── results20210421-142632.txt │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── coco_eval.py │ │ │ ├── coco_utils.py │ │ │ ├── distributed_utils.py │ │ │ ├── group_by_aspect_ratio.py │ │ │ └── train_eval_utils.py │ │ ├── transforms.py │ │ └── validation.py │ ├── ssd/ │ │ ├── README.md │ │ ├── draw_box_utils.py │ │ ├── my_dataset.py │ │ ├── pascal_voc_classes.json │ │ ├── plot_curve.py │ │ ├── predict_test.py │ │ ├── record_mAP.txt │ │ ├── requirements.txt │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ ├── res50_backbone.py │ │ │ ├── ssd_model.py │ │ │ └── utils.py │ │ ├── train_multi_GPU.py │ │ ├── train_ssd300.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── coco_eval.py │ │ │ ├── coco_utils.py │ │ │ ├── distributed_utils.py │ │ │ ├── group_by_aspect_ratio.py │ │ │ └── train_eval_utils.py │ │ ├── transforms.py │ │ └── validation.py │ ├── train_coco_dataset/ │ │ ├── README.md │ │ ├── backbone/ │ │ │ ├── __init__.py │ │ │ ├── feature_pyramid_network.py │ │ │ ├── mobilenetv2_model.py │ │ │ ├── resnet.py │ │ │ ├── resnet50_fpn_model.py │ │ │ └── vgg_model.py │ │ ├── change_backbone_with_fpn.py │ │ ├── coco91_indices.json │ │ ├── compute_receptive_field.py │ │ ├── draw_box_utils.py │ │ ├── my_dataset.py │ │ ├── network_files/ │ │ │ ├── __init__.py │ │ │ ├── boxes.py │ │ │ ├── det_utils.py │ │ │ ├── faster_rcnn_framework.py │ │ │ ├── image_list.py │ │ │ ├── roi_head.py │ │ │ ├── rpn_function.py │ │ │ └── transform.py │ │ ├── plot_curve.py │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── results20220408-201436.txt │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── coco_eval.py │ │ │ ├── distributed_utils.py │ │ │ ├── group_by_aspect_ratio.py │ │ │ └── train_eval_utils.py │ │ ├── transforms.py │ │ └── validation.py │ └── yolov3_spp/ │ ├── README.md │ ├── build_utils/ │ │ ├── __init__.py │ │ ├── datasets.py │ │ ├── img_utils.py │ │ ├── layers.py │ │ ├── parse_config.py │ │ ├── torch_utils.py │ │ └── utils.py │ ├── calculate_dataset.py │ ├── cfg/ │ │ ├── hyp.yaml │ │ └── yolov3-spp.cfg │ ├── draw_box_utils.py │ ├── export_onnx.py │ ├── load_onnx_test.py │ ├── models.py │ ├── predict_test.py │ ├── requirements.txt │ ├── results20210515-152935.txt │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils/ │ │ ├── __init__.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── distributed_utils.py │ │ ├── group_by_aspect_ratio.py │ │ └── train_eval_utils.py │ ├── trans_voc2yolo.py │ └── validation.py ├── pytorch_segmentation/ │ ├── deeplab_v3/ │ │ ├── README.md │ │ ├── get_palette.py │ │ ├── my_dataset.py │ │ ├── palette.json │ │ ├── pascal_voc_classes.json │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── results20211027-104607.txt │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ ├── deeplabv3_model.py │ │ │ ├── mobilenet_backbone.py │ │ │ └── resnet_backbone.py │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── distributed_utils.py │ │ │ └── train_and_eval.py │ │ ├── transforms.py │ │ └── validation.py │ ├── fcn/ │ │ ├── README.md │ │ ├── get_palette.py │ │ ├── my_dataset.py │ │ ├── palette.json │ │ ├── pascal_voc_classes.json │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── results20210918-122740.txt │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ ├── backbone.py │ │ │ └── fcn_model.py │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── distributed_utils.py │ │ │ └── train_and_eval.py │ │ ├── transforms.py │ │ └── validation.py │ ├── lraspp/ │ │ ├── README.md │ │ ├── get_palette.py │ │ ├── my_dataset.py │ │ ├── palette.json │ │ ├── pascal_voc_classes.json │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── results20211028-105233.txt │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ ├── lraspp_model.py │ │ │ └── mobilenet_backbone.py │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── distributed_utils.py │ │ │ └── train_and_eval.py │ │ ├── transforms.py │ │ └── validation.py │ ├── u2net/ │ │ ├── README.md │ │ ├── convert_weight.py │ │ ├── my_dataset.py │ │ ├── predict.py │ │ ├── requirements.txt │ │ ├── results20220723-123632.txt │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ └── model.py │ │ ├── train.py │ │ ├── train_multi_GPU.py │ │ ├── train_utils/ │ │ │ ├── __init__.py │ │ │ ├── distributed_utils.py │ │ │ └── train_and_eval.py │ │ ├── transforms.py │ │ └── validation.py │ └── unet/ │ ├── README.md │ ├── compute_mean_std.py │ ├── my_dataset.py │ ├── predict.py │ ├── requirements.txt │ ├── results20220109-165837.txt │ ├── src/ │ │ ├── __init__.py │ │ ├── mobilenet_unet.py │ │ ├── unet.py │ │ └── vgg_unet.py │ ├── train.py │ ├── train_multi_GPU.py │ ├── train_utils/ │ │ ├── __init__.py │ │ ├── dice_coefficient_loss.py │ │ ├── distributed_utils.py │ │ └── train_and_eval.py │ └── transforms.py ├── summary_problem.md └── tensorflow_classification/ ├── ConfusionMatrix/ │ ├── class_indices.json │ ├── main.py │ └── model.py ├── ConvNeXt/ │ ├── model.py │ ├── predict.py │ ├── train.py │ ├── trans_weights.py │ └── utils.py ├── README.md ├── Test11_efficientnetV2/ │ ├── model.py │ ├── predict.py │ ├── train.py │ ├── trans_weights.py │ └── utils.py ├── Test1_official_demo/ │ ├── model.py │ └── train.py ├── Test2_alexnet/ │ ├── class_indices.json │ ├── fine_train_alexnet.py │ ├── model.py │ ├── predict.py │ ├── read_pth.py │ ├── train.py │ └── trainGPU.py ├── Test3_vgg/ │ ├── class_indices.json │ ├── fine_train_vgg16.py │ ├── model.py │ ├── predict.py │ ├── read_ckpt.py │ ├── train.py │ └── trainGPU.py ├── Test4_goolenet/ │ ├── class_indices.json │ ├── model.py │ ├── model_add_bn.py │ ├── predict.py │ ├── read_pth.py │ ├── train.py │ ├── trainGPU.py │ └── train_add_bn.py ├── Test5_resnet/ │ ├── batch_predict.py │ ├── class_indices.json │ ├── model.py │ ├── predict.py │ ├── read_ckpt.py │ ├── read_h5.py │ ├── subclassed_model.py │ ├── train.py │ └── trainGPU.py ├── Test6_mobilenet/ │ ├── model_v2.py │ ├── model_v3.py │ ├── predict.py │ ├── read_ckpt.py │ ├── trainGPU_mobilenet_v2.py │ ├── train_mobilenet_v2.py │ ├── train_mobilenet_v3.py │ ├── trans_v3_weights.py │ └── utils.py ├── Test7_shuffleNet/ │ ├── model.py │ ├── predict.py │ ├── train.py │ ├── trans_weights.py │ └── utils.py ├── Test9_efficientNet/ │ ├── model.py │ ├── predict.py │ ├── train.py │ └── utils.py ├── analyze_weights_featuremap/ │ ├── alexnet_model.py │ ├── analyze_feature_map.py │ └── analyze_kernel_weight.py ├── custom_dataset/ │ ├── train_fit.py │ └── utils.py ├── swin_transformer/ │ ├── model.py │ ├── predict.py │ ├── train.py │ ├── trans_weights.py │ └── utils.py ├── tensorboard_test/ │ ├── train_fit.py │ └── train_not_fit.py └── vision_transformer/ ├── predict.py ├── train.py ├── trans_weights.py ├── utils.py └── vit_model.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/issue-template.md ================================================ --- name: Issue template about: Use this template for reporting your problem title: '' labels: '' assignees: '' --- **System information** * Have I written custom code: * OS Platform(e.g., window10 or Linux Ubuntu 16.04): * Python version: * Deep learning framework and version(e.g., Tensorflow2.1 or Pytorch1.3): * Use GPU or not: * CUDA/cuDNN version(if you use GPU): * The network you trained(e.g., Resnet34 network): **Describe the current behavior** **Error info / logs** ================================================ FILE: .gitignore ================================================ ##ignore this file## *.idea __pycache__ *.zip flower_data *.h5 *.pth *.pt *.jpg *.ckpt.* *.ckpt *.config *.gz *.onnx *.xml *.bin *.mapping *.csv checkpoint data VOCdevkit ssd_resnet50_v1_fpn_shared_box_predictor runs ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: README.md ================================================ # 深度学习在图像处理中的应用教程 ## 前言 * 本教程是对本人研究生期间的研究内容进行整理总结,总结的同时也希望能够帮助更多的小伙伴。后期如果有学习到新的知识也会与大家一起分享。 * 本教程会以视频的方式进行分享,教学流程如下: 1)介绍网络的结构与创新点 2)使用Pytorch进行网络的搭建与训练 3)使用Tensorflow(内部的keras模块)进行网络的搭建与训练 * 课程中所有PPT都放在`course_ppt`文件夹下,需要的自行下载。 ## 教程目录,点击跳转相应视频(后期会根据学习内容增加) * 图像分类 * LeNet(已完成) * [Pytorch官方demo(Lenet)](https://www.bilibili.com/video/BV187411T7Ye) * [Tensorflow2官方demo](https://www.bilibili.com/video/BV1n7411T7o6) * AlexNet(已完成) * [AlexNet网络讲解](https://www.bilibili.com/video/BV1p7411T7Pc) * [Pytorch搭建AlexNet](https://www.bilibili.com/video/BV1W7411T7qc) * [Tensorflow2搭建Alexnet](https://www.bilibili.com/video/BV1s7411T7vs) * VggNet(已完成) * [VggNet网络讲解](https://www.bilibili.com/video/BV1q7411T7Y6) * [Pytorch搭建VGG网络](https://www.bilibili.com/video/BV1i7411T7ZN) * [Tensorflow2搭建VGG网络](https://www.bilibili.com/video/BV1q7411T76b) * GoogLeNet(已完成) * [GoogLeNet网络讲解](https://www.bilibili.com/video/BV1z7411T7ie) * [Pytorch搭建GoogLeNet网络](https://www.bilibili.com/video/BV1r7411T7M5) * [Tensorflow2搭建GoogLeNet网络](https://www.bilibili.com/video/BV1a7411T7Ht) * ResNet(已完成) * [ResNet网络讲解](https://www.bilibili.com/video/BV1T7411T7wa) * [Pytorch搭建ResNet网络](https://www.bilibili.com/video/BV14E411H7Uw) * [Tensorflow2搭建ResNet网络](https://www.bilibili.com/video/BV1WE41177Ya) * ResNeXt (已完成) * [ResNeXt网络讲解](https://www.bilibili.com/video/BV1Ap4y1p71v/) * [Pytorch搭建ResNeXt网络](https://www.bilibili.com/video/BV1rX4y1N7tE) * MobileNet_V1_V2(已完成) * [MobileNet_V1_V2网络讲解](https://www.bilibili.com/video/BV1yE411p7L7) * [Pytorch搭建MobileNetV2网络](https://www.bilibili.com/video/BV1qE411T7qZ) * [Tensorflow2搭建MobileNetV2网络](https://www.bilibili.com/video/BV1NE411K7tX) * MobileNet_V3(已完成) * [MobileNet_V3网络讲解](https://www.bilibili.com/video/BV1GK4y1p7uE) * [Pytorch搭建MobileNetV3网络](https://www.bilibili.com/video/BV1zT4y1P7pd) * [Tensorflow2搭建MobileNetV3网络](https://www.bilibili.com/video/BV1KA411g7wX) * ShuffleNet_V1_V2 (已完成) * [ShuffleNet_V1_V2网络讲解](https://www.bilibili.com/video/BV15y4y1Y7SY) * [使用Pytorch搭建ShuffleNetV2](https://www.bilibili.com/video/BV1dh411r76X) * [使用Tensorflow2搭建ShuffleNetV2](https://www.bilibili.com/video/BV1kr4y1N7bh) * EfficientNet_V1(已完成) * [EfficientNet网络讲解](https://www.bilibili.com/video/BV1XK4y1U7PX) * [使用Pytorch搭建EfficientNet](https://www.bilibili.com/video/BV19z4y1179h/) * [使用Tensorflow2搭建EfficientNet](https://www.bilibili.com/video/BV1PK4y1S7Jf) * EfficientNet_V2 (已完成) * [EfficientNetV2网络讲解](https://www.bilibili.com/video/BV19v41157AU) * [使用Pytorch搭建EfficientNetV2](https://www.bilibili.com/video/BV1Xy4y1g74u) * [使用Tensorflow搭建EfficientNetV2](https://www.bilibili.com/video/BV19K4y1g7m4) * RepVGG(已完成) * [RepVGG网络讲解](https://www.bilibili.com/video/BV15f4y1o7QR) * Vision Transformer(已完成) * [Multi-Head Attention讲解](https://www.bilibili.com/video/BV15v411W78M) * [Vision Transformer网络讲解](https://www.bilibili.com/video/BV1Jh411Y7WQ) * [使用Pytorch搭建Vision Transformer](https://www.bilibili.com/video/BV1AL411W7dT) * [使用tensorflow2搭建Vision Transformer](https://www.bilibili.com/video/BV1q64y1X7GY) * Swin Transformer(已完成) * [Swin Transformer网络讲解](https://www.bilibili.com/video/BV1pL4y1v7jC) * [使用Pytorch搭建Swin Transformer](https://www.bilibili.com/video/BV1yg411K7Yc) * [使用Tensorflow2搭建Swin Transformer](https://www.bilibili.com/video/BV1bR4y1t7qT) * ConvNeXt(已完成) * [ConvNeXt网络讲解](https://www.bilibili.com/video/BV1SS4y157fu) * [使用Pytorch搭建ConvNeXt](https://www.bilibili.com/video/BV14S4y1L791) * [使用Tensorflow2搭建ConvNeXt](https://www.bilibili.com/video/BV1TS4y1V7Gz) * MobileViT(已完成) * [MobileViT网络讲解](https://www.bilibili.com/video/BV1TG41137sb) * [使用Pytorch搭建MobileViT](https://www.bilibili.com/video/BV1ae411L7Ki) * 目标检测 * Faster-RCNN/FPN(已完成) * [Faster-RCNN网络讲解](https://www.bilibili.com/video/BV1af4y1m7iL) * [FPN网络讲解](https://www.bilibili.com/video/BV1dh411U7D9) * [Faster-RCNN源码解析(Pytorch)](https://www.bilibili.com/video/BV1of4y1m7nj) * SSD/RetinaNet (已完成) * [SSD网络讲解](https://www.bilibili.com/video/BV1fT4y1L7Gi) * [RetinaNet网络讲解](https://www.bilibili.com/video/BV1Q54y1L7sM) * [SSD源码解析(Pytorch)](https://www.bilibili.com/video/BV1vK411H771) * YOLO Series (已完成) * [YOLO系列网络讲解(V1~V3)](https://www.bilibili.com/video/BV1yi4y1g7ro) * [YOLOv3 SPP源码解析(Pytorch版)](https://www.bilibili.com/video/BV1t54y1C7ra) * [YOLOV4网络讲解](https://www.bilibili.com/video/BV1NF41147So) * [YOLOV5网络讲解](https://www.bilibili.com/video/BV1T3411p7zR) * [YOLOX 网络讲解](https://www.bilibili.com/video/BV1JW4y1k76c) * FCOS(已完成) * [FCOS网络讲解](https://www.bilibili.com/video/BV1G5411X7jw) * 语义分割 * FCN (已完成) * [FCN网络讲解](https://www.bilibili.com/video/BV1J3411C7zd) * [FCN源码解析(Pytorch版)](https://www.bilibili.com/video/BV19q4y1971Q) * DeepLabV3 (已完成) * [DeepLabV1网络讲解](https://www.bilibili.com/video/BV1SU4y1N7Ao) * [DeepLabV2网络讲解](https://www.bilibili.com/video/BV1gP4y1G7TC) * [DeepLabV3网络讲解](https://www.bilibili.com/video/BV1Jb4y1q7j7) * [DeepLabV3源码解析(Pytorch版)](https://www.bilibili.com/video/BV1TD4y1c7Wx) * LR-ASPP (已完成) * [LR-ASPP网络讲解](https://www.bilibili.com/video/BV1LS4y1M76E) * [LR-ASPP源码解析(Pytorch版)](https://www.bilibili.com/video/bv13D4y1F7ML) * U-Net (已完成) * [U-Net网络讲解](https://www.bilibili.com/video/BV1Vq4y127fB/) * [U-Net源码解析(Pytorch版)](https://www.bilibili.com/video/BV1Vq4y127fB) * U2Net (已完成) * [U2Net网络讲解](https://www.bilibili.com/video/BV1yB4y1z7mj) * [U2Net源码解析(Pytorch版)](https://www.bilibili.com/video/BV1Kt4y137iS) * 实例分割 * Mask R-CNN(已完成) * [Mask R-CNN网络讲解](https://www.bilibili.com/video/BV1ZY411774T) * [Mask R-CNN源码解析(Pytorch版)](https://www.bilibili.com/video/BV1hY411E7wD) * 关键点检测 * DeepPose(已完成) * [DeepPose网络讲解](https://www.bilibili.com/video/BV1bm421g7aJ) * [DeepPose源码解析(Pytorch版)](https://www.bilibili.com/video/BV1bm421g7aJ) * HRNet(已完成) * [HRNet网络讲解](https://www.bilibili.com/video/BV1bB4y1y7qP) * [HRNet源码解析(Pytorch版)](https://www.bilibili.com/video/BV1ar4y157JM) **[更多相关视频请进入我的bilibili频道查看](https://space.bilibili.com/18161609/channel/index)** --- 欢迎大家关注下我的微信公众号(**阿喆学习小记**),平时会总结些相关学习博文。 如果有什么问题,也可以到我的CSDN中一起讨论。 [https://blog.csdn.net/qq_37541097/article/details/103482003](https://blog.csdn.net/qq_37541097/article/details/103482003) 我的bilibili频道: [https://space.bilibili.com/18161609/channel/index](https://space.bilibili.com/18161609/channel/index) ================================================ FILE: article_link/README.md ================================================ # 文献链接 ## 图像分类(Classification) - LeNet [http://yann.lecun.com/exdb/lenet/index.html](http://yann.lecun.com/exdb/lenet/index.html) - AlexNet [http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) - ZFNet(Visualizing and Understanding Convolutional Networks) [https://arxiv.org/abs/1311.2901](https://arxiv.org/abs/1311.2901) - VGG [https://arxiv.org/abs/1409.1556](https://arxiv.org/abs/1409.1556) - GoogLeNet, Inceptionv1(Going deeper with convolutions) [https://arxiv.org/abs/1409.4842](https://arxiv.org/abs/1409.4842) - Batch Normalization [https://arxiv.org/abs/1502.03167](https://arxiv.org/abs/1502.03167) - Inceptionv3(Rethinking the Inception Architecture for Computer Vision) [https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567) - Inceptionv4, Inception-ResNet [https://arxiv.org/abs/1602.07261](https://arxiv.org/abs/1602.07261) - Xception(Deep Learning with Depthwise Separable Convolutions) [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357) - ResNet [https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385) - ResNeXt [https://arxiv.org/abs/1611.05431](https://arxiv.org/abs/1611.05431) - DenseNet [https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993) - NASNet-A(Learning Transferable Architectures for Scalable Image Recognition) [https://arxiv.org/abs/1707.07012](https://arxiv.org/abs/1707.07012) - SENet(Squeeze-and-Excitation Networks) [https://arxiv.org/abs/1709.01507](https://arxiv.org/abs/1709.01507) - MobileNet(v1) [https://arxiv.org/abs/1704.04861](https://arxiv.org/abs/1704.04861) - MobileNet(v2) [https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381) - MobileNet(v3) [https://arxiv.org/abs/1905.02244](https://arxiv.org/abs/1905.02244) - ShuffleNet(v1) [https://arxiv.org/abs/1707.01083](https://arxiv.org/abs/1707.01083) - ShuffleNet(v2) [https://arxiv.org/abs/1807.11164](https://arxiv.org/abs/1807.11164) - Bag of Tricks for Image Classification with Convolutional Neural Networks [https://arxiv.org/abs/1812.01187](https://arxiv.org/abs/1812.01187) - EfficientNet(v1) [https://arxiv.org/abs/1905.11946](https://arxiv.org/abs/1905.11946) - EfficientNet(v2) [https://arxiv.org/abs/2104.00298](https://arxiv.org/abs/2104.00298) - CSPNet [https://arxiv.org/abs/1911.11929](https://arxiv.org/abs/1911.11929) - RegNet [https://arxiv.org/abs/2003.13678](https://arxiv.org/abs/2003.13678) - NFNets(High-Performance Large-Scale Image Recognition Without Normalization) [https://arxiv.org/abs/2102.06171](https://arxiv.org/abs/2102.06171) - Vision Transformer [https://arxiv.org/abs/2010.11929](https://arxiv.org/abs/2010.11929) - DeiT(Training data-efficient image transformers ) [https://arxiv.org/abs/2012.12877](https://arxiv.org/abs/2012.12877) - Swin Transformer [https://arxiv.org/abs/2103.14030](https://arxiv.org/abs/2103.14030) - Swin Transformer V2: Scaling Up Capacity and Resolution [https://arxiv.org/abs/2111.09883](https://arxiv.org/abs/2111.09883) - BEiT: BERT Pre-Training of Image Transformers [https://arxiv.org/abs/2106.08254](https://arxiv.org/abs/2106.08254) - MAE(Masked Autoencoders Are Scalable Vision Learners) [https://arxiv.org/abs/2111.06377](https://arxiv.org/abs/2111.06377) - ConvNeXt(A ConvNet for the 2020s) [https://arxiv.org/abs/2201.03545](https://arxiv.org/abs/2201.03545) - MobileViT V1 [https://arxiv.org/abs/2110.02178](https://arxiv.org/abs/2110.02178) - MobileViT V2(Separable Self-attention for Mobile Vision Transformers) [https://arxiv.org/abs/2206.02680](https://arxiv.org/abs/2206.02680) - MobileOne(An Improved One millisecond Mobile Backbone) [https://arxiv.org/abs/2206.04040](https://arxiv.org/abs/2206.04040) ## 目标检测(Object Detection) - R-CNN [https://arxiv.org/abs/1311.2524](https://arxiv.org/abs/1311.2524) - Fast R-CNN [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083) - Faster R-CNN [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497) - Cascade R-CNN: Delving into High Quality Object Detection [https://arxiv.org/abs/1712.00726](https://arxiv.org/abs/1712.00726) - Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870) - SSD [https://arxiv.org/abs/1512.02325](https://arxiv.org/abs/1512.02325) - FPN(Feature Pyramid Networks for Object Detection) [https://arxiv.org/abs/1612.03144](https://arxiv.org/abs/1612.03144) - RetinaNet(Focal Loss for Dense Object Detection) [https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002) - Bag of Freebies for Training Object Detection Neural Networks [https://arxiv.org/abs/1902.04103](https://arxiv.org/abs/1902.04103) - YOLOv1 [https://arxiv.org/abs/1506.02640](https://arxiv.org/abs/1506.02640) - YOLOv2 [https://arxiv.org/abs/1612.08242](https://arxiv.org/abs/1612.08242) - YOLOv3 [https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767) - YOLOv4 [https://arxiv.org/abs/2004.10934](https://arxiv.org/abs/2004.10934) - YOLOX(Exceeding YOLO Series in 2021) [https://arxiv.org/abs/2107.08430](https://arxiv.org/abs/2107.08430) - YOLOv7 [https://arxiv.org/abs/2207.02696](https://arxiv.org/abs/2207.02696) - PP-YOLO [https://arxiv.org/abs/2007.12099](https://arxiv.org/abs/2007.12099) - PP-YOLOv2 [https://arxiv.org/abs/2104.10419](https://arxiv.org/abs/2104.10419) - CornerNet [https://arxiv.org/abs/1808.01244](https://arxiv.org/abs/1808.01244) - FCOS(Old) [https://arxiv.org/abs/1904.01355](https://arxiv.org/abs/1904.01355) - FCOS(New) [https://arxiv.org/abs/2006.09214](https://arxiv.org/abs/2006.09214) - CenterNet [https://arxiv.org/abs/1904.07850](https://arxiv.org/abs/1904.07850) ## 语义分割(Semantic Segmentation) - FCN(Fully Convolutional Networks for Semantic Segmentation) [https://arxiv.org/abs/1411.4038](https://arxiv.org/abs/1411.4038) - UNet(U-Net: Convolutional Networks for Biomedical Image Segmentation) [https://arxiv.org/abs/1505.04597](https://arxiv.org/abs/1505.04597) - DeepLabv1(Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs) [https://arxiv.org/abs/1412.7062](https://arxiv.org/abs/1412.7062) - DeepLabv2(Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs) [https://arxiv.org/abs/1606.00915](https://arxiv.org/abs/1606.00915) - DeepLabv3(Rethinking Atrous Convolution for Semantic Image Segmentation) [https://arxiv.org/abs/1706.05587](https://arxiv.org/abs/1706.05587) - DeepLabv3+(Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation) [https://arxiv.org/abs/1802.02611](https://arxiv.org/abs/1802.02611) - SegFormer [https://arxiv.org/abs/2105.15203](https://arxiv.org/abs/2105.15203) ## 显著性目标检测(Salient Object Detection) - U2Net [https://arxiv.org/abs/2005.09007](https://arxiv.org/abs/2005.09007) ## 实例分割(Instance Segmentation) - Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870) ## 关键点检测(Keypoint Detection) - HRNet(Deep High-Resolution Representation Learning for Human Pose Estimation) [https://arxiv.org/abs/1902.09212](https://arxiv.org/abs/1902.09212) ## 网络量化(Quantization) - Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference [https://arxiv.org/abs/1712.05877](https://arxiv.org/abs/1712.05877) - Quantizing deep convolutional networks for efficient inference: A whitepaper [https://arxiv.org/abs/1806.08342](https://arxiv.org/abs/1806.08342) - Data-Free Quantization Through Weight Equalization and Bias Correction [https://arxiv.org/abs/1906.04721](https://arxiv.org/abs/1906.04721) - LSQ: Learned Step Size Quantization [https://arxiv.org/abs/1902.08153](https://arxiv.org/abs/1902.08153) - LSQ+: Improving low-bit quantization through learnable offsets and better initialization [https://arxiv.org/abs/2004.09576](https://arxiv.org/abs/2004.09576) ## 自然语言处理 - Attention Is All You Need [https://arxiv.org/abs/1706.03762](https://arxiv.org/abs/1706.03762) ## Others - Microsoft COCO: Common Objects in Context [https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312) - The PASCALVisual Object Classes Challenge: A Retrospective [http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf) - Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization [https://arxiv.org/abs/1610.02391](https://arxiv.org/abs/1610.02391) ================================================ FILE: course_ppt/README.md ================================================ # 为了精简项目,课程中的所有ppt都已转存至百度云 **所有PPT都放在该文件夹中** 链接:https://pan.baidu.com/s/1VL6QTQ86sfY2aMDVo4Z-kg 提取码:4ydw **下面为单独每个ppt的链接**: ## 分类网络相关 - **AlexNet** 链接: https://pan.baidu.com/s/1RJn5lzY8LwrmckUPvXcjmg 提取码: 34ue - **VGG** 链接: https://pan.baidu.com/s/1BnYpdaDwAIcgRm7YwakEZw 提取码: 8ev0 - **GoogleNet** 链接: https://pan.baidu.com/s/1XjZXprvayV3dDMvLjoOk3A 提取码: 9hq4 - **ResNet** 链接: https://pan.baidu.com/s/1I2LUlwCSjNKr37T0n3NKzg 提取码: f1s9 - **ResNext** 链接:https://pan.baidu.com/s/1-anFYX5572MJmiQym9D4Eg 提取码:f8ob - **MobileNet_v1_v2** 链接: https://pan.baidu.com/s/1ReDDCuK8wyH0XqniUgiSYQ 提取码: ipqv - **MobileNet_v3** 链接:https://pan.baidu.com/s/13mzSpyxuA4T4ki7kEN1Xqw 提取码:fp5g - **ShuffleNet_v1_v2** 链接:https://pan.baidu.com/s/1-DDwePMPCDvjw08YU8nAAA 提取码:ad6n - **EfficientNet_v1** 链接:https://pan.baidu.com/s/1Sep9W0vLzfjhcHAXr6Bv0Q 提取码:eufl - **EfficientNet_v2** 链接:https://pan.baidu.com/s/1tesrgY4CHLmq6P7s7TcHCw 提取码:y2kz - **Transformer** 链接:https://pan.baidu.com/s/1DE6RDySr7NS0HQ35gBqP_g 提取码:y9e7 - **Vision Transformer** 链接:https://pan.baidu.com/s/1wzpHG8EK5gxg6UCMscYqMw 提取码:cm1m - **Swin Transformer** 链接:https://pan.baidu.com/s/1O6XEEZUb6B6AGYON7-EOgA 提取码:qkrn - **ConvNeXt** 链接:https://pan.baidu.com/s/1mgZjkirJPZ8huVls-O0xXA 提取码:kvqx - **RepVGG** 链接:https://pan.baidu.com/s/1uJP3hCHI79-tUdBNR_VAWQ 提取码:qe8a - **MobileViT** 链接:https://pan.baidu.com/s/1F8QJtFhTPWX8Vjr8_97scQ 提取码:lfn5 - **ConfusionMatrix** 链接: https://pan.baidu.com/s/1EtKzHkZyv2XssYtqmGYCLg 提取码: uoo5 - **Grad-CAM** 链接:https://pan.baidu.com/s/1ZHKBW7hINQXFI36hBYdC0Q 提取码:aru7 ## 目标检测网络相关 - **R-CNN** 链接: https://pan.baidu.com/s/1l_ZxkfJdyp3KoMLqwWbx5A 提取码: nm1l - **Fast R-CNN** 链接: https://pan.baidu.com/s/1Pe_Tg43OVo-yZWj7t-_L6Q 提取码: fe73 - **Faster R-CNN** 链接:https://pan.baidu.com/s/1Dd0d_LY8l7Y1YkHQhp-WfA 提取码:vzp4 - **FPN** 链接:https://pan.baidu.com/s/1O9H0iqQMg9f_FZezUEKZ9g 提取码:qbl8 - **SSD** 链接: https://pan.baidu.com/s/15zF3GhIdg-E_tZX2Y2X-rw 提取码: u7k1 - **RetinaNet** 链接:https://pan.baidu.com/s/1beW612VCSnSu-v8iu_2-fA 提取码:vqbu - **YOLOv1** 链接: https://pan.baidu.com/s/1vVyUNQHYEGjqosezlx_1Mg 提取码: b3i0 - **YOLOv2** 链接: https://pan.baidu.com/s/132aW1e_NYbaxxGi3cDVLYg 提取码: tak7 - **YOLOv3** 链接:https://pan.baidu.com/s/1hZqdgh7wA7QeGAYTttlVOQ 提取码:5ulo - **YOLOv3SPP** 链接: https://pan.baidu.com/s/15LRssnPez9pn6jRpW89Wlw 提取码: nv9f - **YOLOv4** 链接:https://pan.baidu.com/s/1Ltw4v1pg0eZNFYR2ZBbZmQ 提取码:qjx4 - **YOLOv5** 链接:https://pan.baidu.com/s/1rnvjwHLvOlJ9KpJ5z95GWw 提取码:kt04 - **YOLOX** 链接:https://pan.baidu.com/s/1ex54twQC7hBE3szNko_K5A 提取码:al0r - **FCOS** 链接: https://pan.baidu.com/s/1KUc9dzvAbtwtGGm3ZZy_cw 提取码: h0as - **Calculate mAP** 链接: https://pan.baidu.com/s/1jdA_n78J7nSUoOg6TTO5Bg 提取码: eh62 - **coco数据集简介** 链接:https://pan.baidu.com/s/1HfCvjt-8o9j5a916IYNVjw 提取码:6rec ## 图像分割网络相关 - **语义分割前言** 链接:https://pan.baidu.com/s/1cwxe2wbaA_2DqNYADq3myA 提取码:zzij - **转置卷积** 链接:https://pan.baidu.com/s/1A8688168fuWHyxJQtzupHw 提取码:pgnf - **FCN** 链接:https://pan.baidu.com/s/1XLUneTLrdUyDAiV6kqi9rw 提取码:126a - **膨胀卷积** 链接:https://pan.baidu.com/s/1QlQyniuMhBeXyEK420MIdQ 提取码:ry6p - **DeepLab V1** 链接:https://pan.baidu.com/s/1NFxb7ADQOMVYLxmIKqTONQ 提取码:500s - **DeepLab V2** 链接:https://pan.baidu.com/s/1woe3lJYBVkOdnn6XXlKf8g 提取码:76ec - **DeepLab V3** 链接:https://pan.baidu.com/s/1WVBgc2Ld13D0_dkHGwhTpA 提取码:m54m - **UNet** 链接: https://pan.baidu.com/s/1WDwI-DuzYklMvwyRxVUXjA 提取码: rd4j - **U2Net** 链接:https://pan.baidu.com/s/1ekbEm4dsjlFamK8dCs8yfA 提取码:472j ## 实例分割 - **Mask R-CNN** 链接:https://pan.baidu.com/s/1JpQ7ENEv_x9A1-O_NpjwYA 提取码:1t4i ## 关键点检测 - **HRNet** 链接: https://pan.baidu.com/s/1-8AJdU82K1j70KZK_rN7aQ 提取码: t4me ================================================ FILE: data_set/README.md ================================================ ## 该文件夹是用来存放训练数据的目录 ### 使用步骤如下: * (1)在data_set文件夹下创建新文件夹"flower_data" * (2)点击链接下载花分类数据集 [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz) * (3)解压数据集到flower_data文件夹下 * (4)执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val ``` ├── flower_data ├── flower_photos(解压的数据集文件夹,3670个样本) ├── train(生成的训练集,3306个样本) └── val(生成的验证集,364个样本) ``` ================================================ FILE: data_set/split_data.py ================================================ import os from shutil import copy, rmtree import random def mk_file(file_path: str): if os.path.exists(file_path): # 如果文件夹存在,则先删除原文件夹在重新创建 rmtree(file_path) os.makedirs(file_path) def main(): # 保证随机可复现 random.seed(0) # 将数据集中10%的数据划分到验证集中 split_rate = 0.1 # 指向你解压后的flower_photos文件夹 cwd = os.getcwd() data_root = os.path.join(cwd, "flower_data") origin_flower_path = os.path.join(data_root, "flower_photos") assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path) flower_class = [cla for cla in os.listdir(origin_flower_path) if os.path.isdir(os.path.join(origin_flower_path, cla))] # 建立保存训练集的文件夹 train_root = os.path.join(data_root, "train") mk_file(train_root) for cla in flower_class: # 建立每个类别对应的文件夹 mk_file(os.path.join(train_root, cla)) # 建立保存验证集的文件夹 val_root = os.path.join(data_root, "val") mk_file(val_root) for cla in flower_class: # 建立每个类别对应的文件夹 mk_file(os.path.join(val_root, cla)) for cla in flower_class: cla_path = os.path.join(origin_flower_path, cla) images = os.listdir(cla_path) num = len(images) # 随机采样验证集的索引 eval_index = random.sample(images, k=int(num*split_rate)) for index, image in enumerate(images): if image in eval_index: # 将分配至验证集中的文件复制到相应目录 image_path = os.path.join(cla_path, image) new_path = os.path.join(val_root, cla) copy(image_path, new_path) else: # 将分配至训练集中的文件复制到相应目录 image_path = os.path.join(cla_path, image) new_path = os.path.join(train_root, cla) copy(image_path, new_path) print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar print() print("processing done!") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_onnx_cls/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: deploying_service/deploying_pytorch/convert_onnx_cls/main.py ================================================ from PIL import Image import torchvision.transforms as transforms import torch import torch.onnx import onnx import onnxruntime import numpy as np from model import resnet34 device = torch.device("cpu") def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def main(save_path=None): assert isinstance(save_path, str), "lack of save_path parameter..." # create model model = resnet34(num_classes=5) # load model weights model_weight_path = "./resNet34.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() # input to the model # [batch, channel, height, width] x = torch.rand(1, 3, 224, 224, requires_grad=True) torch_out = model(x) # export the model torch.onnx.export(model, # model being run x, # model input (or a tuple for multiple inputs) save_path, # where to save the model (can be a file or file-like object) export_params=True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization input_names=["input"], # the model's input names output_names=["output"], # the model's output names dynamic_axes={"input": {0: "batch_size"}, # variable length axes "output": {0: "batch_size"}}) # check onnx model onnx_model = onnx.load(save_path) onnx.checker.check_model(onnx_model) ort_session = onnxruntime.InferenceSession(save_path) # compute ONNX Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and Pytorch results # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance. np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) print("Exported model has been tested with ONNXRuntime, and the result looks good!") # load test image img = Image.open("../tulip.jpg") # pre-process preprocess = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) img = preprocess(img) img = img.unsqueeze_(0) # feed image into onnx model ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)} ort_outs = ort_session.run(None, ort_inputs) prediction = ort_outs[0] # np softmax process prediction -= np.max(prediction, keepdims=True) # 为了稳定地计算softmax概率, 一般会减掉最大元素 prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True) print(prediction) if __name__ == '__main__': onnx_file_name = "resnet34.onnx" main(save_path=onnx_file_name) ================================================ FILE: deploying_service/deploying_pytorch/convert_onnx_cls/model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/README.md ================================================ 本项目展示如何将Pytorch中的ResNet34网络转成Openvino的IR格式,并进行量化处理,具体使用流程如下: 1. 按照`requirements.txt`配置环境 2. 下载事先训练好的ResNet34权重(之前在花分类数据集上训练得到的)放在当前文件夹下。百度云链接: https://pan.baidu.com/s/1x4WFX1HynYcXLium3UaaFQ 密码: qvi6 3. 使用`convert_pytorch2onnx.py`将Resnet34转成ONNX格式 4. 在命令行中使用以下指令将ONNX转成IR格式: ``` mo --input_model resnet34.onnx \ --input_shape "[1,3,224,224]" \ --mean_values="[123.675,116.28,103.53]" \ --scale_values="[58.395,57.12,57.375]" \ --data_type FP32 \ --output_dir ir_output ``` 5. 下载并解压花分类数据集,将`quantization_int8.py`中的`data_path`指向解压后的`flower_photos` 6. 使用`quantization_int8.py`量化模型 ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/compare_fps.py ================================================ import time import numpy as np import torch import onnxruntime import matplotlib.pyplot as plt from openvino.runtime import Core from torchvision.models import resnet34 def normalize(image: np.ndarray) -> np.ndarray: """ Normalize the image to the given mean and standard deviation """ image = image.astype(np.float32) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) image /= 255.0 image -= mean image /= std return image def onnx_inference(onnx_path: str, image: np.ndarray, num_images: int = 20): # load onnx model ort_session = onnxruntime.InferenceSession(onnx_path) # compute onnx Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: image} start = time.perf_counter() for _ in range(num_images): ort_session.run(None, ort_inputs) end = time.perf_counter() time_onnx = end - start print( f"ONNX model in Inference Engine/CPU: {time_onnx / num_images:.3f} " f"seconds per image, FPS: {num_images / time_onnx:.2f}" ) return num_images / time_onnx def ir_inference(ir_path: str, image: np.ndarray, num_images: int = 20): # Load the network in Inference Engine ie = Core() model_ir = ie.read_model(model=ir_path) compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU") # Get input and output layers input_layer_ir = next(iter(compiled_model_ir.inputs)) output_layer_ir = next(iter(compiled_model_ir.outputs)) start = time.perf_counter() request_ir = compiled_model_ir.create_infer_request() for _ in range(num_images): request_ir.infer(inputs={input_layer_ir.any_name: image}) end = time.perf_counter() time_ir = end - start print( f"IR model in Inference Engine/CPU: {time_ir / num_images:.3f} " f"seconds per image, FPS: {num_images / time_ir:.2f}" ) return num_images / time_ir def pytorch_inference(image: np.ndarray, num_images: int = 20): image = torch.as_tensor(image, dtype=torch.float32) model = resnet34(pretrained=False, num_classes=5) model.eval() with torch.no_grad(): start = time.perf_counter() for _ in range(num_images): model(image) end = time.perf_counter() time_torch = end - start print( f"PyTorch model on CPU: {time_torch / num_images:.3f} seconds per image, " f"FPS: {num_images / time_torch:.2f}" ) return num_images / time_torch def plot_fps(v: dict): x = list(v.keys()) y = list(v.values()) plt.bar(range(len(x)), y, align='center') plt.xticks(range(len(x)), x) for i, v in enumerate(y): plt.text(x=i, y=v+0.5, s=f"{v:.2f}", ha='center') plt.xlabel('model format') plt.ylabel('fps') plt.title('FPS comparison') plt.show() plt.savefig('fps_vs.jpg') def main(): image_h = 224 image_w = 224 onnx_path = "resnet34.onnx" ir_path = "ir_output/resnet34.xml" image = np.random.randn(image_h, image_w, 3) normalized_image = normalize(image) # Convert the resized images to network input shape # [h, w, c] -> [c, h, w] -> [1, c, h, w] input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0) normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) onnx_fps = onnx_inference(onnx_path, normalized_input_image, num_images=100) ir_fps = ir_inference(ir_path, input_image, num_images=100) pytorch_fps = pytorch_inference(normalized_input_image, num_images=100) plot_fps({"pytorch": round(pytorch_fps, 2), "onnx": round(onnx_fps, 2), "ir": round(ir_fps, 2)}) if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/compare_onnx_and_ir.py ================================================ import numpy as np import onnxruntime from openvino.runtime import Core def normalize(image: np.ndarray) -> np.ndarray: """ Normalize the image to the given mean and standard deviation """ image = image.astype(np.float32) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) image /= 255.0 image -= mean image /= std return image def onnx_inference(onnx_path: str, image: np.ndarray): # load onnx model ort_session = onnxruntime.InferenceSession(onnx_path) # compute onnx Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: image} res_onnx = ort_session.run(None, ort_inputs)[0] return res_onnx def ir_inference(ir_path: str, image: np.ndarray): # Load the network in Inference Engine ie = Core() model_ir = ie.read_model(model=ir_path) compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU") # Get input and output layers input_layer_ir = next(iter(compiled_model_ir.inputs)) output_layer_ir = next(iter(compiled_model_ir.outputs)) # Run inference on the input image res_ir = compiled_model_ir([image])[output_layer_ir] return res_ir def main(): image_h = 224 image_w = 224 onnx_path = "resnet34.onnx" ir_path = "ir_output/resnet34.xml" image = np.random.randn(image_h, image_w, 3) normalized_image = normalize(image) # Convert the resized images to network input shape # [h, w, c] -> [c, h, w] -> [1, c, h, w] input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0) normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) onnx_res = onnx_inference(onnx_path, normalized_input_image) ir_res = ir_inference(ir_path, input_image) np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05) print("Exported model has been tested with OpenvinoRuntime, and the result looks good!") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/convert_pytorch2onnx.py ================================================ import torch import torch.onnx import onnx import onnxruntime import numpy as np from torchvision.models import resnet34 device = torch.device("cpu") def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def main(): weights_path = "resNet34(flower).pth" onnx_file_name = "resnet34.onnx" batch_size = 1 img_h = 224 img_w = 224 img_channel = 3 # create model and load pretrain weights model = resnet34(pretrained=False, num_classes=5) model.load_state_dict(torch.load(weights_path, map_location='cpu')) model.eval() # input to the model # [batch, channel, height, width] x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True) torch_out = model(x) # export the model torch.onnx.export(model, # model being run x, # model input (or a tuple for multiple inputs) onnx_file_name, # where to save the model (can be a file or file-like object) verbose=False) # check onnx model onnx_model = onnx.load(onnx_file_name) onnx.checker.check_model(onnx_model) ort_session = onnxruntime.InferenceSession(onnx_file_name) # compute ONNX Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and Pytorch results # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance. np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) print("Exported model has been tested with ONNXRuntime, and the result looks good!") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/model.py ================================================ from typing import Callable, List, Optional import torch from torch import nn, Tensor from torch.nn import functional as F from functools import partial def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNActivation(nn.Sequential): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None): padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.ReLU6 super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False), norm_layer(out_planes), activation_layer(inplace=True)) class SqueezeExcitation(nn.Module): def __init__(self, input_c: int, squeeze_factor: int = 4): super(SqueezeExcitation, self).__init__() squeeze_c = _make_divisible(input_c // squeeze_factor, 8) self.fc1 = nn.Conv2d(input_c, squeeze_c, 1) self.fc2 = nn.Conv2d(squeeze_c, input_c, 1) def forward(self, x: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) return scale * x class InvertedResidualConfig: def __init__(self, input_c: int, kernel: int, expanded_c: int, out_c: int, use_se: bool, activation: str, stride: int, width_multi: float): self.input_c = self.adjust_channels(input_c, width_multi) self.kernel = kernel self.expanded_c = self.adjust_channels(expanded_c, width_multi) self.out_c = self.adjust_channels(out_c, width_multi) self.use_se = use_se self.use_hs = activation == "HS" # whether using h-swish activation self.stride = stride @staticmethod def adjust_channels(channels: int, width_multi: float): return _make_divisible(channels * width_multi, 8) class InvertedResidual(nn.Module): def __init__(self, cnf: InvertedResidualConfig, norm_layer: Callable[..., nn.Module]): super(InvertedResidual, self).__init__() if cnf.stride not in [1, 2]: raise ValueError("illegal stride value.") self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c) layers: List[nn.Module] = [] activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU # expand if cnf.expanded_c != cnf.input_c: layers.append(ConvBNActivation(cnf.input_c, cnf.expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer)) # depthwise layers.append(ConvBNActivation(cnf.expanded_c, cnf.expanded_c, kernel_size=cnf.kernel, stride=cnf.stride, groups=cnf.expanded_c, norm_layer=norm_layer, activation_layer=activation_layer)) if cnf.use_se: layers.append(SqueezeExcitation(cnf.expanded_c)) # project layers.append(ConvBNActivation(cnf.expanded_c, cnf.out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)) self.block = nn.Sequential(*layers) self.out_channels = cnf.out_c self.is_strided = cnf.stride > 1 def forward(self, x: Tensor) -> Tensor: result = self.block(x) if self.use_res_connect: result += x return result class MobileNetV3(nn.Module): def __init__(self, inverted_residual_setting: List[InvertedResidualConfig], last_channel: int, num_classes: int = 1000, block: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None): super(MobileNetV3, self).__init__() if not inverted_residual_setting: raise ValueError("The inverted_residual_setting should not be empty.") elif not (isinstance(inverted_residual_setting, List) and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])): raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") if block is None: block = InvertedResidual if norm_layer is None: norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) layers: List[nn.Module] = [] # building first layer firstconv_output_c = inverted_residual_setting[0].input_c layers.append(ConvBNActivation(3, firstconv_output_c, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=nn.Hardswish)) # building inverted residual blocks for cnf in inverted_residual_setting: layers.append(block(cnf, norm_layer)) # building last several layers lastconv_input_c = inverted_residual_setting[-1].out_c lastconv_output_c = 6 * lastconv_input_c layers.append(ConvBNActivation(lastconv_input_c, lastconv_output_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Hardswish)) self.features = nn.Sequential(*layers) self.avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel), nn.Hardswish(inplace=True), nn.Dropout(p=0.2, inplace=True), nn.Linear(last_channel, num_classes)) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def _forward_impl(self, x: Tensor) -> Tensor: x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def mobilenet_v3_large(num_classes: int = 1000, reduced_tail: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride bneck_conf(16, 3, 16, 16, False, "RE", 1), bneck_conf(16, 3, 64, 24, False, "RE", 2), # C1 bneck_conf(24, 3, 72, 24, False, "RE", 1), bneck_conf(24, 5, 72, 40, True, "RE", 2), # C2 bneck_conf(40, 5, 120, 40, True, "RE", 1), bneck_conf(40, 5, 120, 40, True, "RE", 1), bneck_conf(40, 3, 240, 80, False, "HS", 2), # C3 bneck_conf(80, 3, 200, 80, False, "HS", 1), bneck_conf(80, 3, 184, 80, False, "HS", 1), bneck_conf(80, 3, 184, 80, False, "HS", 1), bneck_conf(80, 3, 480, 112, True, "HS", 1), bneck_conf(112, 3, 672, 112, True, "HS", 1), bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2), # C4 bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1), bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1), ] last_channel = adjust_channels(1280 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) def mobilenet_v3_small(num_classes: int = 1000, reduced_tail: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride bneck_conf(16, 3, 16, 16, True, "RE", 2), # C1 bneck_conf(16, 3, 72, 24, False, "RE", 2), # C2 bneck_conf(24, 3, 88, 24, False, "RE", 1), bneck_conf(24, 5, 96, 40, True, "HS", 2), # C3 bneck_conf(40, 5, 240, 40, True, "HS", 1), bneck_conf(40, 5, 240, 40, True, "HS", 1), bneck_conf(40, 5, 120, 48, True, "HS", 1), bneck_conf(48, 5, 144, 48, True, "HS", 1), bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2), # C4 bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1), bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1) ] last_channel = adjust_channels(1024 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/quantization_int8.py ================================================ from addict import Dict from compression.engines.ie_engine import IEEngine from compression.graph import load_model, save_model from compression.graph.model_utils import compress_model_weights from compression.pipeline.initializer import create_pipeline from utils import MyDataLoader, Accuracy, read_split_data def main(): data_path = "/data/flower_photos" ir_model_xml = "ir_output/resnet34.xml" ir_model_bin = "ir_output/resnet34.bin" save_dir = "quant_ir_output" model_name = "quantized_resnet34" img_w = 224 img_h = 224 model_config = Dict({ 'model_name': 'resnet34', 'model': ir_model_xml, 'weights': ir_model_bin }) engine_config = Dict({ 'device': 'CPU', 'stat_requests_number': 2, 'eval_requests_number': 2 }) dataset_config = { 'data_source': data_path } algorithms = [ { 'name': 'DefaultQuantization', 'params': { 'target_device': 'CPU', 'preset': 'performance', 'stat_subset_size': 300 } } ] # Steps 1-7: Model optimization # Step 1: Load the model. model = load_model(model_config) # Step 2: Initialize the data loader. _, _, val_images_path, val_images_label = read_split_data(data_path, val_rate=0.2) data_loader = MyDataLoader(dataset_config, val_images_path, val_images_label, img_w, img_h) # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric. metric = Accuracy(top_k=1) # Step 4: Initialize the engine for metric calculation and statistics collection. engine = IEEngine(engine_config, data_loader, metric) # Step 5: Create a pipeline of compression algorithms. pipeline = create_pipeline(algorithms, engine) # Step 6: Execute the pipeline. compressed_model = pipeline.run(model) # Step 7 (Optional): Compress model weights quantized precision # in order to reduce the size of final .bin file. compress_model_weights(compressed_model) # Step 8: Save the compressed model to the desired path. compressed_model_paths = save_model(model=compressed_model, save_path=save_dir, model_name=model_name) # Step 9: Compare accuracy of the original and quantized models. metric_results = pipeline.evaluate(model) if metric_results: for name, value in metric_results.items(): print(f"Accuracy of the original model: {name}: {value}") metric_results = pipeline.evaluate(compressed_model) if metric_results: for name, value in metric_results.items(): print(f"Accuracy of the optimized model: {name}: {value}") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/requirements.txt ================================================ torch==1.11.0 torchvision==0.12.0 onnx==1.13.0 onnxruntime==1.8.0 protobuf==3.19.5 openvino-dev==2022.1.0 matplotlib ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/utils.py ================================================ import os import json import random from PIL import Image import numpy as np from compression.api import DataLoader, Metric from torchvision.transforms import transforms def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) return train_images_path, train_images_label, val_images_path, val_images_label # Custom implementation of classification accuracy metric. class Accuracy(Metric): # Required methods def __init__(self, top_k=1): super().__init__() self._top_k = top_k self._name = 'accuracy@top{}'.format(self._top_k) self._matches = [] @property def value(self): """ Returns accuracy metric value for the last model output. """ return {self._name: self._matches[-1]} @property def avg_value(self): """ Returns accuracy metric value for all model outputs. """ return {self._name: np.ravel(self._matches).mean()} def update(self, output, target): """ Updates prediction matches. :param output: model output :param target: annotations """ if len(output) > 1: raise Exception('The accuracy metric cannot be calculated ' 'for a model with multiple outputs') if isinstance(target, dict): target = list(target.values()) predictions = np.argsort(output[0], axis=1)[:, -self._top_k:] match = [float(t in predictions[i]) for i, t in enumerate(target)] self._matches.append(match) def reset(self): """ Resets collected matches """ self._matches = [] def get_attributes(self): """ Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}. Required attributes: 'direction': 'higher-better' or 'higher-worse' 'type': metric type """ return {self._name: {'direction': 'higher-better', 'type': 'accuracy'}} class MyDataLoader(DataLoader): def __init__(self, cfg, images_path: list, images_label: list, img_w: int = 224, img_h: int = 224): super().__init__(cfg) self.images_path = images_path self.images_label = images_label self.image_w = img_w self.image_h = img_h self.transforms = transforms.Compose([ transforms.Resize(min(img_h, img_w)), transforms.CenterCrop((img_h, img_w)) ]) def __len__(self): return len(self.images_label) def __getitem__(self, index): """ Return one sample of index, label and picture. :param index: index of the taken sample. """ if index >= len(self): raise IndexError img = Image.open(self.images_path[index]) img = self.transforms(img) # Convert the resized images to network input shape # [h, w, c] -> [c, h, w] -> [1, c, h, w] img = np.expand_dims(np.transpose(np.array(img), (2, 0, 1)), 0) return (index, self.images_label[index]), img ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/README.md ================================================ OpenVINO量化YOLOv5 1. 按照`requirements.txt`配置环境 2. 将YOLOv5转为ONNX YOLOv5官方有提供导出ONNX以及OpenVINO的方法,但我这里仅导出成ONNX,这里以YOLOv5s为例 ``` python export.py --weights yolov5s.pt --include onnx ``` 3. ONNX转换为IR 使用OpenVINO的`mo`工具将ONNX转为OpenVINO的IR格式 ``` mo --input_model yolov5s.onnx \ --input_shape "[1,3,640,640]" \ --scale 255 \ --data_type FP32 \ --output_dir ir_output ``` 4. 量化模型 使用`quantization_int8.py`进行模型的量化,量化过程中需要使用到COCO2017数据集,需要将`data_path`指向coco2017目录 ``` ├── coco2017: 数据集根目录 ├── train2017: 所有训练图像文件夹(118287张) ├── val2017: 所有验证图像文件夹(5000张) └── annotations: 对应标注文件夹 ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件 ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件 ├── captions_train2017.json: 对应图像描述的训练集标注文件 ├── captions_val2017.json: 对应图像描述的验证集标注文件 ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件 └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹 ``` 5. benchmark 直接利用`benchmark_app`工具测试量化前后的`Throughput`,这里以`CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz`设备为例 ``` benchmark_app -m ir_output/yolov5s.xml -d CPU -api sync ``` output: ``` Latency: Median: 59.56 ms AVG: 63.30 ms MIN: 57.88 ms MAX: 99.89 ms Throughput: 16.79 FPS ``` ``` benchmark_app -m quant_ir_output/quantized_yolov5s.xml -d CPU -api sync ``` output: ``` Latency: Median: 42.97 ms AVG: 46.56 ms MIN: 41.18 ms MAX: 95.75 ms Throughput: 23.27 FPS ``` ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_fps.py ================================================ import time import numpy as np import torch import onnxruntime import matplotlib.pyplot as plt from openvino.runtime import Core def normalize(image: np.ndarray) -> np.ndarray: """ Normalize the image to the given mean and standard deviation """ image = image.astype(np.float32) image /= 255.0 return image def onnx_inference(onnx_path: str, image: np.ndarray, num_images: int = 20): # load onnx model ort_session = onnxruntime.InferenceSession(onnx_path) # compute onnx Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: image} start = time.perf_counter() for _ in range(num_images): ort_session.run(None, ort_inputs) end = time.perf_counter() time_onnx = end - start print( f"ONNX model in Inference Engine/CPU: {time_onnx / num_images:.3f} " f"seconds per image, FPS: {num_images / time_onnx:.2f}" ) return num_images / time_onnx def ir_inference(ir_path: str, image: np.ndarray, num_images: int = 20): # Load the network in Inference Engine ie = Core() model_ir = ie.read_model(model=ir_path) compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU") # Get input and output layers input_layer_ir = next(iter(compiled_model_ir.inputs)) output_layer_ir = next(iter(compiled_model_ir.outputs)) start = time.perf_counter() request_ir = compiled_model_ir.create_infer_request() for _ in range(num_images): request_ir.infer(inputs={input_layer_ir.any_name: image}) end = time.perf_counter() time_ir = end - start print( f"IR model in Inference Engine/CPU: {time_ir / num_images:.3f} " f"seconds per image, FPS: {num_images / time_ir:.2f}" ) return num_images / time_ir def pytorch_inference(image: np.ndarray, num_images: int = 20): image = torch.as_tensor(image, dtype=torch.float32) model = torch.hub.load('ultralytics/yolov5', 'yolov5s') model.eval() with torch.no_grad(): start = time.perf_counter() for _ in range(num_images): model(image) end = time.perf_counter() time_torch = end - start print( f"PyTorch model on CPU: {time_torch / num_images:.3f} seconds per image, " f"FPS: {num_images / time_torch:.2f}" ) return num_images / time_torch def plot_fps(v: dict): x = list(v.keys()) y = list(v.values()) plt.bar(range(len(x)), y, align='center') plt.xticks(range(len(x)), x) for i, v in enumerate(y): plt.text(x=i, y=v+0.5, s=f"{v:.2f}", ha='center') plt.xlabel('model format') plt.ylabel('fps') plt.title('FPS comparison') plt.show() plt.savefig('fps_vs.jpg') def main(): image_h = 640 image_w = 640 onnx_path = "yolov5s.onnx" ir_path = "ir_output/yolov5s.xml" image = np.random.randn(image_h, image_w, 3) normalized_image = normalize(image) # Convert the resized images to network input shape # [h, w, c] -> [c, h, w] -> [1, c, h, w] input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0) normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) onnx_fps = onnx_inference(onnx_path, normalized_input_image, num_images=100) ir_fps = ir_inference(ir_path, input_image, num_images=100) pytorch_fps = pytorch_inference(normalized_input_image, num_images=100) plot_fps({"pytorch": round(pytorch_fps, 2), "onnx": round(onnx_fps, 2), "ir": round(ir_fps, 2)}) if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_onnx_and_ir.py ================================================ import numpy as np import onnxruntime from openvino.runtime import Core def normalize(image: np.ndarray) -> np.ndarray: """ Normalize the image to the given mean and standard deviation """ image = image.astype(np.float32) image /= 255.0 return image def onnx_inference(onnx_path: str, image: np.ndarray): # load onnx model ort_session = onnxruntime.InferenceSession(onnx_path) # compute onnx Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: image} res_onnx = ort_session.run(None, ort_inputs)[0] return res_onnx def ir_inference(ir_path: str, image: np.ndarray): # Load the network in Inference Engine ie = Core() model_ir = ie.read_model(model=ir_path) compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU") # Get input and output layers input_layer_ir = next(iter(compiled_model_ir.inputs)) output_layer_ir = next(iter(compiled_model_ir.outputs)) # Run inference on the input image res_ir = compiled_model_ir([image])[output_layer_ir] return res_ir def main(): image_h = 640 image_w = 640 onnx_path = "yolov5s.onnx" ir_path = "ir_output/yolov5s.xml" image = np.random.randn(image_h, image_w, 3) normalized_image = normalize(image) # Convert the resized images to network input shape # [h, w, c] -> [c, h, w] -> [1, c, h, w] input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0) normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) onnx_res = onnx_inference(onnx_path, normalized_input_image) ir_res = ir_inference(ir_path, input_image) np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05) print("Exported model has been tested with OpenvinoRuntime, and the result looks good!") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = False): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/evaluation.py ================================================ from tqdm import tqdm import torch from openvino.runtime import Core from utils import MyDataLoader, EvalCOCOMetric, non_max_suppression def main(): data_path = "/data/coco2017" ir_model_xml = "quant_ir_output/quantized_yolov5s.xml" img_size = (640, 640) # h, w data_loader = MyDataLoader(data_path, "val", size=img_size) coco80_to_91 = data_loader.coco_id80_to_id91 metrics = EvalCOCOMetric(coco=data_loader.coco, classes_mapping=coco80_to_91) # Load the network in Inference Engine ie = Core() model_ir = ie.read_model(model=ir_model_xml) compiled_model = ie.compile_model(model=model_ir, device_name="CPU") inputs_names = compiled_model.inputs outputs_names = compiled_model.outputs # inference request = compiled_model.create_infer_request() for i in tqdm(range(len(data_loader))): data = data_loader[i] ann, img, info = data ann = ann + (info,) request.infer(inputs={inputs_names[0]: img}) result = request.get_output_tensor(outputs_names[0].index).data # post-process result = non_max_suppression(torch.Tensor(result), conf_thres=0.001, iou_thres=0.6, multi_label=True)[0] boxes = result[:, :4].numpy() scores = result[:, 4].numpy() cls = result[:, 5].numpy().astype(int) metrics.update(ann, [boxes, cls, scores]) metrics.evaluate() if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/predict.py ================================================ import cv2 import numpy as np import torch from PIL import Image import matplotlib.pyplot as plt from openvino.runtime import Core from utils import letterbox, scale_coords, non_max_suppression, coco80_names from draw_box_utils import draw_objs def main(): img_path = "test.jpg" ir_model_xml = "ir_output/yolov5s.xml" img_size = (640, 640) # h, w origin_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) reshape_img, ratio, pad = letterbox(origin_img, img_size, auto=False) input_img = np.expand_dims(np.transpose(reshape_img, [2, 0, 1]), 0).astype(np.float32) # Load the network in Inference Engine ie = Core() model_ir = ie.read_model(model=ir_model_xml) compiled_model = ie.compile_model(model=model_ir, device_name="CPU") inputs_names = compiled_model.inputs outputs_names = compiled_model.outputs # inference request = compiled_model.create_infer_request() request.infer(inputs={inputs_names[0]: input_img}) result = request.get_output_tensor(outputs_names[0].index).data # post-process result = non_max_suppression(torch.Tensor(result))[0] boxes = result[:, :4].numpy() scores = result[:, 4].numpy() cls = result[:, 5].numpy().astype(int) boxes = scale_coords(reshape_img.shape, boxes, origin_img.shape, (ratio, pad)) draw_img = draw_objs(Image.fromarray(origin_img), boxes, cls, scores, category_index=dict([(str(i), v) for i, v in enumerate(coco80_names)])) plt.imshow(draw_img) plt.show() draw_img.save("predict.jpg") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/quantization_int8.py ================================================ import time from addict import Dict from compression.engines.ie_engine import IEEngine from compression.graph import load_model, save_model from compression.graph.model_utils import compress_model_weights from compression.pipeline.initializer import create_pipeline from yaspin import yaspin from utils import MyDataLoader, MAPMetric def main(): data_path = "/data/coco2017" ir_model_xml = "ir_output/yolov5s.xml" ir_model_bin = "ir_output/yolov5s.bin" save_dir = "quant_ir_output" model_name = "quantized_yolov5s" img_w = 640 img_h = 640 model_config = Dict({ 'model_name': 'yolov5s', 'model': ir_model_xml, 'weights': ir_model_bin, 'inputs': 'images', 'outputs': 'output' }) engine_config = Dict({'device': 'CPU'}) algorithms = [ { 'name': 'DefaultQuantization', 'params': { 'target_device': 'CPU', 'preset': 'performance', 'stat_subset_size': 300 } } ] # Step 1: Load the model. model = load_model(model_config) # Step 2: Initialize the data loader. data_loader = MyDataLoader(data_path, "val", (img_h, img_w)) # Step 3: initialize the metric # For DefaultQuantization, specifying a metric is optional: metric can be set to None metric = MAPMetric(map_value="map") # Step 4: Initialize the engine for metric calculation and statistics collection. engine = IEEngine(config=engine_config, data_loader=data_loader, metric=metric) # Step 5: Create a pipeline of compression algorithms. pipeline = create_pipeline(algorithms, engine) # Step 6: Execute the pipeline to quantize the model algorithm_name = pipeline.algo_seq[0].name with yaspin( text=f"Executing POT pipeline on {model_config['model']} with {algorithm_name}" ) as sp: start_time = time.perf_counter() compressed_model = pipeline.run(model) end_time = time.perf_counter() sp.ok("✔") print(f"Quantization finished in {end_time - start_time:.2f} seconds") # Step 7 (Optional): Compress model weights to quantized precision # in order to reduce the size of the final .bin file compress_model_weights(compressed_model) # Step 8: Save the compressed model to the desired path. # Set save_path to the directory where the compressed model should be stored compressed_model_paths = save_model( model=compressed_model, save_path=save_dir, model_name=model_name, ) compressed_model_path = compressed_model_paths[0]["model"] print("The quantized model is stored at", compressed_model_path) # Compute the mAP on the quantized model and compare with the mAP on the FP16 IR model. ir_model = load_model(model_config=model_config) evaluation_pipeline = create_pipeline(algo_config=dict(), engine=engine) with yaspin(text="Evaluating original IR model") as sp: original_metric = evaluation_pipeline.evaluate(ir_model) if original_metric: for key, value in original_metric.items(): print(f"The {key} score of the original model is {value:.5f}") with yaspin(text="Evaluating quantized IR model") as sp: quantized_metric = pipeline.evaluate(compressed_model) if quantized_metric: for key, value in quantized_metric.items(): print(f"The {key} score of the quantized INT8 model is {value:.5f}") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/requirements.txt ================================================ torch==1.13.1 torchvision==0.12.0 onnx==1.13.0 onnxruntime==1.8.0 protobuf==3.19.5 openvino-dev==2022.1.0 matplotlib torchmetrics==0.9.1 ================================================ FILE: deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/utils.py ================================================ import os import time import json import copy import cv2 import numpy as np import torch from torchmetrics.detection.mean_ap import MeanAveragePrecision import torchvision from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from compression.api import DataLoader, Metric coco80_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] def box_iou(box1, box2): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ def box_area(box): # box = 4xn return (box[2] - box[0]) * (box[3] - box[1]) area1 = box_area(box1.T) area2 = box_area(box2.T) # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) def xywh2xyxy(x): # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y return y def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, labels=(), max_det=300): """Runs Non-Maximum Suppression (NMS) on inference results Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ nc = prediction.shape[2] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Checks assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' # Settings min_wh, max_wh = 2, 7680 # (pixels) minimum and maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # Cat apriori labels if autolabelling if labels and len(labels[xi]): lb = labels[xi] v = torch.zeros((len(lb), nc + 5), device=x.device) v[:, :4] = lb[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] if (time.time() - t) > time_limit: print(f'WARNING: NMS time limit {time_limit}s exceeded') break # time limit exceeded return output class MAPMetric(Metric): def __init__(self, map_value="map", conf_thres=0.001, iou_thres=0.6): """ Mean Average Precision Metric. Wraps torchmetrics implementation, see https://torchmetrics.readthedocs.io/en/stable/detection/mean_average_precision.html :map_value: specific metric to return. Default: "map" Change `to one of the values in the list below to return a different value ['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large', 'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large'] See torchmetrics documentation for more details. """ self._name = map_value self.metric = MeanAveragePrecision(box_format="xyxy") self.conf_thres = conf_thres self.iou_thres = iou_thres super().__init__() @property def value(self): """ Returns metric value for the last model output. Possible format: {metric_name: [metric_values_per_image]} """ return {self._name: [0]} @property def avg_value(self): """ Returns average metric value for all model outputs. Possible format: {metric_name: metric_value} """ return {self._name: self.metric.compute()[self._name].item()} def update(self, output, target): """ Convert network output and labels to the format that torchmetrics' MAP implementation expects, and call `metric.update()`. :param output: model output :param target: annotations for model output """ targetboxes = [] targetlabels = [] predboxes = [] predlabels = [] scores = [] for single_target in target[0]: txmin, tymin, txmax, tymax = single_target["bbox"] category = single_target["category_id"] targetbox = [round(txmin), round(tymin), round(txmax), round(tymax)] targetboxes.append(targetbox) targetlabels.append(category) output = torch.Tensor(output[0]).float() output = non_max_suppression(output, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True) for single_output in output: for pred in single_output.numpy(): xmin, ymin, xmax, ymax, conf, label = pred predbox = [round(xmin), round(ymin), round(xmax), round(ymax)] predboxes.append(predbox) predlabels.append(label) scores.append(conf) preds = [ dict( boxes=torch.Tensor(predboxes).float(), labels=torch.Tensor(predlabels).short(), scores=torch.Tensor(scores), ) ] targets = [ dict( boxes=torch.Tensor(targetboxes).float(), labels=torch.Tensor(targetlabels).short(), ) ] self.metric.update(preds, targets) def reset(self): """ Resets metric """ self.metric.reset() def get_attributes(self): """ Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}. Required attributes: 'direction': 'higher-better' or 'higher-worse' 'type': metric type """ return {self._name: {"direction": "higher-better", "type": "mAP"}} def _coco_remove_images_without_annotations(dataset, ids): """ 删除coco数据集中没有目标,或者目标面积非常小的数据 refer to: https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py :param dataset: :param cat_list: :return: """ def _has_only_empty_bbox(anno): return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) def _has_valid_annotation(anno): # if it's empty, there is no annotation if len(anno) == 0: return False # if all boxes have close to zero area, there is no annotation if _has_only_empty_bbox(anno): return False return True valid_ids = [] for ds_idx, img_id in enumerate(ids): ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None) anno = dataset.loadAnns(ann_ids) if _has_valid_annotation(anno): valid_ids.append(img_id) return valid_ids def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: assert ratio_pad[0][0] == ratio_pad[0][1] gain = ratio_pad[0][0] pad = ratio_pad[1] coords[:, [0, 2]] -= pad[0] # x padding coords[:, [1, 3]] -= pad[1] # y padding coords[:, :4] /= gain clip_coords(coords, img0_shape) return coords def clip_coords(boxes, shape): # Clip bounding xyxy bounding boxes to image shape (height, width) if isinstance(boxes, torch.Tensor): # faster individually boxes[:, 0].clamp_(0, shape[1]) # x1 boxes[:, 1].clamp_(0, shape[0]) # y1 boxes[:, 2].clamp_(0, shape[1]) # x2 boxes[:, 3].clamp_(0, shape[0]) # y2 else: # np.array (faster grouped) boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better val mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (left, top) class MyDataLoader(DataLoader): """`MS Coco Detection `_ Dataset. Args: root (string): Root directory where images are downloaded to. dataset (string): "train" or "val. size (tuple): (h, w) """ def __init__(self, root, dataset="train", size=(640, 640)): assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]' anno_file = "instances_{}2017.json".format(dataset) assert os.path.exists(root), "file '{}' does not exist.".format(root) self.img_root = os.path.join(root, "{}2017".format(dataset)) assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root) self.anno_path = os.path.join(root, "annotations", anno_file) assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path) self.mode = dataset self.size = size self.coco = COCO(self.anno_path) self.coco91_id2classes = dict([(v["id"], v["name"]) for k, v in self.coco.cats.items()]) coco90_classes2id = dict([(v["name"], v["id"]) for k, v in self.coco.cats.items()]) self.coco80_classes = coco80_names self.coco_id80_to_id91 = dict([(i, coco90_classes2id[k]) for i, k in enumerate(coco80_names)]) ids = list(sorted(self.coco.imgs.keys())) # 移除没有目标,或者目标面积非常小的数据 valid_ids = _coco_remove_images_without_annotations(self.coco, ids) self.ids = valid_ids def parse_targets(self, coco_targets: list, w: int = None, h: int = None, ratio: tuple = None, pad: tuple = None): assert w > 0 assert h > 0 # 只筛选出单个对象的情况 anno = [obj for obj in coco_targets if obj['iscrowd'] == 0] boxes = [obj["bbox"] for obj in anno] # guard against no boxes via resizing boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax] boxes[:, 2:] += boxes[:, :2] boxes[:, 0::2] = np.clip(boxes[:, 0::2], a_min=0, a_max=w) boxes[:, 1::2] = np.clip(boxes[:, 1::2], a_min=0, a_max=h) classes = [self.coco80_classes.index(self.coco91_id2classes[obj["category_id"]]) for obj in anno] classes = np.array(classes, dtype=int) # 筛选出合法的目标,即x_max>x_min且y_max>y_min keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) boxes = boxes[keep] classes = classes[keep] if ratio is not None: # width, height ratios boxes[:, 0::2] *= ratio[0] boxes[:, 1::2] *= ratio[1] if pad is not None: # dw, dh padding dw, dh = pad boxes[:, 0::2] += dw boxes[:, 1::2] += dh target_annotations = [] for i in range(boxes.shape[0]): target_annotation = { "category_id": int(classes[i]), "bbox": boxes[i].tolist() } target_annotations.append(target_annotation) return target_annotations def __getitem__(self, index): """ Get an item from the dataset at the specified index. Detection boxes are converted from absolute coordinates to relative coordinates between 0 and 1 by dividing xmin, xmax by image width and ymin, ymax by image height. :return: (annotation, input_image, metadata) where annotation is (index, target_annotation) with target_annotation as a dictionary with keys category_id, image_width, image_height and bbox, containing the relative bounding box coordinates [xmin, ymin, xmax, ymax] (with values between 0 and 1) and metadata a dictionary: {"filename": path_to_image} """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) coco_target = coco.loadAnns(ann_ids) image_path = coco.loadImgs(img_id)[0]['file_name'] img = cv2.imread(os.path.join(self.img_root, image_path)) origin_h, origin_w, c = img.shape image, ratio, pad = letterbox(img, auto=False, new_shape=self.size) target_annotations = self.parse_targets(coco_target, origin_w, origin_h, ratio, pad) item_annotation = (index, target_annotations) input_image = np.expand_dims(image.transpose(2, 0, 1), axis=0).astype( np.float32 ) return ( item_annotation, input_image, {"filename": str(image_path), "origin_shape": img.shape, "shape": image.shape, "img_id": img_id, "ratio_pad": [ratio, pad]}, ) def __len__(self): return len(self.ids) @staticmethod def collate_fn(x): return x class EvalCOCOMetric: def __init__(self, coco: COCO = None, iou_type: str = "bbox", results_file_name: str = "predict_results.json", classes_mapping: dict = None): self.coco = copy.deepcopy(coco) self.results = [] self.classes_mapping = classes_mapping self.coco_evaluator = None assert iou_type in ["bbox"] self.iou_type = iou_type self.results_file_name = results_file_name def prepare_for_coco_detection(self, ann, output): """将预测的结果转换成COCOeval指定的格式,针对目标检测任务""" # 遍历每张图像的预测结果 if len(output[0]) == 0: return img_id = ann[2]["img_id"] per_image_boxes = output[0] per_image_boxes = scale_coords(img1_shape=ann[2]["shape"], coords=per_image_boxes, img0_shape=ann[2]["origin_shape"], ratio_pad=ann[2]["ratio_pad"]) # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h] # 而我们预测的box格式是[x_min, y_min, x_max, y_max],所以需要转下格式 per_image_boxes[:, 2:] -= per_image_boxes[:, :2] per_image_classes = output[1].tolist() per_image_scores = output[2].tolist() # 遍历每个目标的信息 for object_score, object_class, object_box in zip( per_image_scores, per_image_classes, per_image_boxes): object_score = float(object_score) class_idx = int(object_class) if self.classes_mapping is not None: class_idx = self.classes_mapping[class_idx] # We recommend rounding coordinates to the nearest tenth of a pixel # to reduce resulting JSON file size. object_box = [round(b, 2) for b in object_box.tolist()] res = {"image_id": img_id, "category_id": class_idx, "bbox": object_box, "score": round(object_score, 3)} self.results.append(res) def update(self, targets, outputs): if self.iou_type == "bbox": self.prepare_for_coco_detection(targets, outputs) else: raise KeyError(f"not support iou_type: {self.iou_type}") def evaluate(self): # write predict results into json file json_str = json.dumps(self.results, indent=4) with open(self.results_file_name, 'w') as json_file: json_file.write(json_str) # accumulate predictions from all images coco_true = self.coco coco_pre = coco_true.loadRes(self.results_file_name) self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type) self.coco_evaluator.evaluate() self.coco_evaluator.accumulate() print(f"IoU metric: {self.iou_type}") self.coco_evaluator.summarize() coco_info = self.coco_evaluator.stats.tolist() # numpy to list return coco_info ================================================ FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/compare_onnx_and_trt.py ================================================ import numpy as np import tensorrt as trt import onnxruntime import pycuda.driver as cuda import pycuda.autoinit def normalize(image: np.ndarray) -> np.ndarray: """ Normalize the image to the given mean and standard deviation """ image = image.astype(np.float32) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) image /= 255.0 image -= mean image /= std return image def onnx_inference(onnx_path: str, image: np.ndarray): # load onnx model ort_session = onnxruntime.InferenceSession(onnx_path) # compute onnx Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: image} res_onnx = ort_session.run(None, ort_inputs)[0] return res_onnx def trt_inference(trt_path: str, image: np.ndarray): # Load the network in Inference Engine trt_logger = trt.Logger(trt.Logger.WARNING) with open(trt_path, "rb") as f, trt.Runtime(trt_logger) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) with engine.create_execution_context() as context: # Set input shape based on image dimensions for inference context.set_binding_shape(engine.get_binding_index("input"), (1, 3, image.shape[-2], image.shape[-1])) # Allocate host and device buffers bindings = [] for binding in engine: binding_idx = engine.get_binding_index(binding) size = trt.volume(context.get_binding_shape(binding_idx)) dtype = trt.nptype(engine.get_binding_dtype(binding)) if engine.binding_is_input(binding): input_buffer = np.ascontiguousarray(image) input_memory = cuda.mem_alloc(image.nbytes) bindings.append(int(input_memory)) else: output_buffer = cuda.pagelocked_empty(size, dtype) output_memory = cuda.mem_alloc(output_buffer.nbytes) bindings.append(int(output_memory)) stream = cuda.Stream() # Transfer input data to the GPU. cuda.memcpy_htod_async(input_memory, input_buffer, stream) # Run inference context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) # Transfer prediction output from the GPU. cuda.memcpy_dtoh_async(output_buffer, output_memory, stream) # Synchronize the stream stream.synchronize() res_trt = np.reshape(output_buffer, (1, -1)) return res_trt def main(): image_h = 224 image_w = 224 onnx_path = "resnet34.onnx" trt_path = "trt_output/resnet34.trt" image = np.random.randn(image_h, image_w, 3) normalized_image = normalize(image) # Convert the resized images to network input shape # [h, w, c] -> [c, h, w] -> [1, c, h, w] normalized_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) onnx_res = onnx_inference(onnx_path, normalized_image) ir_res = trt_inference(trt_path, normalized_image) np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05) print("Exported model has been tested with TensorRT Runtime, and the result looks good!") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/convert_pytorch2onnx.py ================================================ import torch import torch.onnx import onnx import onnxruntime import numpy as np from torchvision.models import resnet34 device = torch.device("cpu") def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def main(): weights_path = "resNet34(flower).pth" onnx_file_name = "resnet34.onnx" batch_size = 1 img_h = 224 img_w = 224 img_channel = 3 # create model and load pretrain weights model = resnet34(pretrained=False, num_classes=5) model.load_state_dict(torch.load(weights_path, map_location='cpu')) model.eval() # input to the model # [batch, channel, height, width] x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True) torch_out = model(x) # export the model torch.onnx.export(model, # model being run x, # model input (or a tuple for multiple inputs) onnx_file_name, # where to save the model (can be a file or file-like object) input_names=["input"], output_names=["output"], verbose=False) # check onnx model onnx_model = onnx.load(onnx_file_name) onnx.checker.check_model(onnx_model) ort_session = onnxruntime.InferenceSession(onnx_file_name) # compute ONNX Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and Pytorch results # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance. np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) print("Exported model has been tested with ONNXRuntime, and the result looks good!") if __name__ == '__main__': main() ================================================ FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/quantization.py ================================================ """ refer to: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/userguide.html """ import os import math import argparse from absl import logging from tqdm import tqdm import torch import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torchvision import transforms from torchvision.models.resnet import resnet34 as create_model from pytorch_quantization import nn as quant_nn from pytorch_quantization import quant_modules, calib from pytorch_quantization.tensor_quant import QuantDescriptor from my_dataset import MyDataSet from utils import read_split_data, train_one_epoch, evaluate logging.set_verbosity(logging.FATAL) def export_onnx(model, onnx_filename, onnx_bs): model.eval() # We have to shift to pytorch's fake quant ops before exporting the model to ONNX quant_nn.TensorQuantizer.use_fb_fake_quant = True opset_version = 13 print(f"Export ONNX file: {onnx_filename}") dummy_input = torch.randn(onnx_bs, 3, 224, 224).cuda() torch.onnx.export(model, dummy_input, onnx_filename, verbose=False, opset_version=opset_version, enable_onnx_checker=False, input_names=["input"], output_names=["output"]) def collect_stats(model, data_loader, num_batches): """Feed data to the network and collect statistic""" # Enable calibrators for name, module in model.named_modules(): if isinstance(module, quant_nn.TensorQuantizer): if module._calibrator is not None: module.disable_quant() module.enable_calib() else: module.disable() for i, (images, _) in tqdm(enumerate(data_loader), total=num_batches): model(images.cuda()) if i >= num_batches: break # Disable calibrators for name, module in model.named_modules(): if isinstance(module, quant_nn.TensorQuantizer): if module._calibrator is not None: module.enable_quant() module.disable_calib() else: module.enable() def compute_amax(model, **kwargs): # Load calib result for name, module in model.named_modules(): if isinstance(module, quant_nn.TensorQuantizer): if module._calibrator is not None: if isinstance(module._calibrator, calib.MaxCalibrator): module.load_calib_amax() else: module.load_calib_amax(**kwargs) print(f"{name:40}: {module}") model.cuda() def main(args): quant_modules.initialize() assert torch.cuda.is_available(), "only support GPU!" train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # ########################## # # Post Training Quantization # # ########################## # # We will use histogram based calibration for activations and the default max calibration for weights. quant_desc_input = QuantDescriptor(calib_method='histogram') quant_nn.QuantConv2d.set_default_quant_desc_input(quant_desc_input) quant_nn.QuantLinear.set_default_quant_desc_input(quant_desc_input) model = create_model(num_classes=args.num_classes) assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights) model.load_state_dict(torch.load(args.weights, map_location='cpu')) model.cuda() # It is a bit slow since we collect histograms on CPU with torch.no_grad(): collect_stats(model, val_loader, num_batches=1000 // batch_size) compute_amax(model, method="percentile", percentile=99.99) # validate evaluate(model=model, data_loader=val_loader, epoch=0) torch.save(model.state_dict(), "quant_model_calibrated.pth") if args.qat: # ########################### # # Quantization Aware Training # # ########################### # pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5) # Scheduler(half of a cosine period) lf = lambda x: (math.cos(x * math.pi / 2 / args.epochs)) * (1 - args.lrf) + args.lrf scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, epoch=epoch) scheduler.step() # validate evaluate(model=model, data_loader=val_loader, epoch=epoch) export_onnx(model, args.onnx_filename, args.onnx_bs) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=5) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--lrf', type=float, default=0.01) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # 训练好的权重路径 parser.add_argument('--weights', type=str, default='./resNet(flower).pth', help='trained weights path') parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') parser.add_argument('--onnx-filename', default='resnet34.onnx', help='save onnx model filename') parser.add_argument('--onnx-bs', default=1, help='save onnx model batch size') parser.add_argument('--qat', type=bool, default=True, help='whether use quantization aware training') opt = parser.parse_args() main(opt) ================================================ FILE: deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) return train_images_path, train_images_label, val_images_path, val_images_label def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() accu_loss = torch.zeros(1).cuda() # 累计损失 accu_num = torch.zeros(1).cuda() # 累计预测正确的样本数 optimizer.zero_grad() sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.cuda()) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.cuda()).sum() loss = loss_function(pred, labels.cuda()) loss.backward() accu_loss += loss.detach() data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return accu_loss.item() / (step + 1), accu_num.item() / sample_num @torch.no_grad() def evaluate(model, data_loader, epoch): loss_function = torch.nn.CrossEntropyLoss() model.eval() accu_num = torch.zeros(1).cuda() # 累计预测正确的样本数 accu_loss = torch.zeros(1).cuda() # 累计损失 sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.cuda()) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.cuda()).sum() loss = loss_function(pred, labels.cuda()) accu_loss += loss data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) return accu_loss.item() / (step + 1), accu_num.item() / sample_num ================================================ FILE: deploying_service/deploying_pytorch/pytorch_flask_service/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: deploying_service/deploying_pytorch/pytorch_flask_service/main.py ================================================ import os import io import json import torch import torchvision.transforms as transforms from PIL import Image from flask import Flask, jsonify, request, render_template from flask_cors import CORS from model import MobileNetV2 app = Flask(__name__) CORS(app) # 解决跨域问题 weights_path = "./MobileNetV2(flower).pth" class_json_path = "./class_indices.json" assert os.path.exists(weights_path), "weights path does not exist..." assert os.path.exists(class_json_path), "class json path does not exist..." # select device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # create model model = MobileNetV2(num_classes=5).to(device) # load model weights model.load_state_dict(torch.load(weights_path, map_location=device)) model.eval() # load class info json_file = open(class_json_path, 'rb') class_indict = json.load(json_file) def transform_image(image_bytes): my_transforms = transforms.Compose([transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize( [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) image = Image.open(io.BytesIO(image_bytes)) if image.mode != "RGB": raise ValueError("input file does not RGB image...") return my_transforms(image).unsqueeze(0).to(device) def get_prediction(image_bytes): try: tensor = transform_image(image_bytes=image_bytes) outputs = torch.softmax(model.forward(tensor).squeeze(), dim=0) prediction = outputs.detach().cpu().numpy() template = "class:{:<15} probability:{:.3f}" index_pre = [(class_indict[str(index)], float(p)) for index, p in enumerate(prediction)] # sort probability index_pre.sort(key=lambda x: x[1], reverse=True) text = [template.format(k, v) for k, v in index_pre] return_info = {"result": text} except Exception as e: return_info = {"result": [str(e)]} return return_info @app.route("/predict", methods=["POST"]) @torch.no_grad() def predict(): image = request.files["file"] img_bytes = image.read() info = get_prediction(image_bytes=img_bytes) return jsonify(info) @app.route("/", methods=["GET", "POST"]) def root(): return render_template("up.html") if __name__ == '__main__': app.run(host="0.0.0.0", port=5000) ================================================ FILE: deploying_service/deploying_pytorch/pytorch_flask_service/model.py ================================================ from torch import nn import torch def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(nn.Sequential): def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1): padding = (kernel_size - 1) // 2 super(ConvBNReLU, self).__init__( nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False), nn.BatchNorm2d(out_channel), nn.ReLU6(inplace=True) ) class InvertedResidual(nn.Module): def __init__(self, in_channel, out_channel, stride, expand_ratio): super(InvertedResidual, self).__init__() hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel layers = [] if expand_ratio != 1: # 1x1 pointwise conv layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1)) layers.extend([ # 3x3 depthwise conv ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel), # 1x1 pointwise conv(linear) nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False), nn.BatchNorm2d(out_channel), ]) self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_shortcut: return x + self.conv(x) else: return self.conv(x) class MobileNetV2(nn.Module): def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] features = [] # conv1 layer features.append(ConvBNReLU(3, input_channel, stride=2)) # building inverted residual residual blockes for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel # building last several layers features.append(ConvBNReLU(input_channel, last_channel, 1)) # combine feature layers self.features = nn.Sequential(*features) # building classifier self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(last_channel, num_classes) ) # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ================================================ FILE: deploying_service/deploying_pytorch/pytorch_flask_service/requirements.txt ================================================ Flask==2.2.5 Flask_Cors==3.0.9 Pillow ================================================ FILE: deploying_service/deploying_pytorch/pytorch_flask_service/templates/up.html ================================================ HTML5上传图片并预览 ================================================ FILE: deploying_service/pruning_model_pytorch/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: deploying_service/pruning_model_pytorch/main.py ================================================ import os import torch from torchvision import transforms, datasets import torch.nn.utils.prune as prune import torch.nn.functional as F from tqdm import tqdm import time from model import resnet34 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = data_root + "/data_set/flower_data/" # flower data set path batch_size = 16 def validate_model(model: torch.nn.Module): validate_dataset = datasets.ImageFolder(root=image_path + "val", transform=data_transform) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=2) model.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): t1 = time.time() for val_data in tqdm(validate_loader, desc="validate model accuracy."): val_images, val_labels = val_data outputs = model(val_images.to(device)) # eval model only have last output layer predict_y = torch.max(outputs, dim=1)[1] acc += torch.sum(torch.eq(predict_y, val_labels.to(device))).item() val_accurate = acc / val_num print('test_accuracy: %.3f, time:%.3f' % (val_accurate, time.time() - t1)) return val_accurate def count_sparsity(model: torch.nn.Module, p=True): sum_zeros_num = 0 sum_weights_num = 0 for name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): zeros_elements = torch.sum(torch.eq(module.weight, 0)).item() weights_elements = module.weight.numel() sum_zeros_num += zeros_elements sum_weights_num += weights_elements if p is True: print("Sparsity in {}.weights {:.2f}%".format(name, 100 * zeros_elements / weights_elements)) print("Global sparsity: {:.2f}%".format(100 * sum_zeros_num / sum_weights_num)) def main(): weights_path = "./resNet34.pth" model = resnet34(num_classes=5) model.load_state_dict(torch.load(weights_path, map_location=device)) model.to(device) # validate_model(model) # module = model.conv1 # print(list(module.named_parameters())) # # print(list(module.named_buffers())) # # # 裁剪50%的卷积核 # prune.ln_structured(module, name="weight", amount=0.5, n=2, dim=0) # print(list(module.weight)) # print(module.weight.shape) # # print(list(module.named_buffers())) # # prune.remove(module, "weight") # print(module.weight.shape) # 收集所有需要裁剪的卷积核 parameters_to_prune = [] for name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): parameters_to_prune.append((module, "weight")) # 对卷积核进行剪枝处理 prune.global_unstructured(parameters_to_prune, pruning_method=prune.L1Unstructured, amount=0.5) # 统计剪枝比例 count_sparsity(model, p=False) # 验证剪枝后的模型 validate_model(model) # print(model) # for name, module in model.named_modules(): # if isinstance(module, torch.nn.Conv2d): # prune.remove(module, "weight") # validate_model(model) # torch.save(model.state_dict(), "pruning_model.pth") if __name__ == '__main__': main() ================================================ FILE: deploying_service/pruning_model_pytorch/model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: deploying_service/pruning_model_pytorch/predict.py ================================================ import torch from model import resnet34 from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt import json device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img = Image.open("../tulip.jpg") plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict try: json_file = open('./class_indices.json', 'r') class_indict = json.load(json_file) except Exception as e: print(e) exit(-1) # create model model = resnet34(num_classes=5) # load model weights model_weight_path = "./resNet34.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img)) predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.show() ================================================ FILE: deploying_service/pruning_model_pytorch/train.py ================================================ import torch import torch.nn as nn from torchvision import transforms, datasets import json import matplotlib.pyplot as plt import os import torch.optim as optim from model import resnet34, resnet101 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = data_root + "/data_set/flower_data/" # flower data set path train_dataset = datasets.ImageFolder(root=image_path+"train", transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 16 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) validate_dataset = datasets.ImageFolder(root=image_path + "val", transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=0) net = resnet34() # load pretrain weights model_weight_path = "./resnet34-pre.pth" missing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False) # for param in net.parameters(): # param.requires_grad = False # change fc layer structure inchannel = net.fc.in_features net.fc = nn.Linear(inchannel, 5) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) best_acc = 0.0 save_path = './resNet34.pth' for epoch in range(3): # train net.train() running_loss = 0.0 for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() # print train process rate = (step+1)/len(train_loader) a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(int(rate*100), a, b, loss), end="") print() # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data outputs = net(val_images.to(device)) # eval model only have last output layer # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += (predict_y == val_labels.to(device)).sum().item() val_accurate = acc / val_num if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, running_loss / step, val_accurate)) print('Finished Training') ================================================ FILE: others_project/draw_dilated_conv/main.py ================================================ import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap def dilated_conv_one_pixel(center: (int, int), feature_map: np.ndarray, k: int = 3, r: int = 1, v: int = 1): """ 膨胀卷积核中心在指定坐标center处时,统计哪些像素被利用到, 并在利用到的像素位置处加上增量v Args: center: 膨胀卷积核中心的坐标 feature_map: 记录每个像素使用次数的特征图 k: 膨胀卷积核的kernel大小 r: 膨胀卷积的dilation rate v: 使用次数增量 """ assert divmod(3, 2)[1] == 1 # left-top: (x, y) left_top = (center[0] - ((k - 1) // 2) * r, center[1] - ((k - 1) // 2) * r) for i in range(k): for j in range(k): feature_map[left_top[1] + i * r][left_top[0] + j * r] += v def dilated_conv_all_map(dilated_map: np.ndarray, k: int = 3, r: int = 1): """ 根据输出特征矩阵中哪些像素被使用以及使用次数, 配合膨胀卷积k和r计算输入特征矩阵哪些像素被使用以及使用次数 Args: dilated_map: 记录输出特征矩阵中每个像素被使用次数的特征图 k: 膨胀卷积核的kernel大小 r: 膨胀卷积的dilation rate """ new_map = np.zeros_like(dilated_map) for i in range(dilated_map.shape[0]): for j in range(dilated_map.shape[1]): if dilated_map[i][j] > 0: dilated_conv_one_pixel((j, i), new_map, k=k, r=r, v=dilated_map[i][j]) return new_map def plot_map(matrix: np.ndarray): plt.figure() c_list = ['white', 'blue', 'red'] new_cmp = LinearSegmentedColormap.from_list('chaos', c_list) plt.imshow(matrix, cmap=new_cmp) ax = plt.gca() ax.set_xticks(np.arange(-0.5, matrix.shape[1], 1), minor=True) ax.set_yticks(np.arange(-0.5, matrix.shape[0], 1), minor=True) # 显示color bar plt.colorbar() # 在图中标注数量 thresh = 5 for x in range(matrix.shape[1]): for y in range(matrix.shape[0]): # 注意这里的matrix[y, x]不是matrix[x, y] info = int(matrix[y, x]) ax.text(x, y, info, verticalalignment='center', horizontalalignment='center', color="white" if info > thresh else "black") ax.grid(which='minor', color='black', linestyle='-', linewidth=1.5) plt.show() plt.close() def main(): # bottom to top dilated_rates = [1, 2, 3] # init feature map size = 31 m = np.zeros(shape=(size, size), dtype=np.int32) center = size // 2 m[center][center] = 1 # print(m) # plot_map(m) for index, dilated_r in enumerate(dilated_rates[::-1]): new_map = dilated_conv_all_map(m, r=dilated_r) m = new_map print(m) plot_map(m) if __name__ == '__main__': main() ================================================ FILE: others_project/kmeans_anchors/main.py ================================================ import random import numpy as np from tqdm import tqdm from scipy.cluster.vq import kmeans from read_voc import VOCDataSet from yolo_kmeans import k_means, wh_iou def anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float): # mutation fitness r = wh[:, None] / k[None] x = np.minimum(r, 1. / r).min(2) # ratio metric # x = wh_iou(wh, k) # iou metric best = x.max(1) f = (best * (best > thr).astype(np.float32)).mean() # fitness bpr = (best > thr).astype(np.float32).mean() # best possible recall return f, bpr def main(img_size=512, n=9, thr=0.25, gen=1000): # 从数据集中读取所有图片的wh以及对应bboxes的wh dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt") im_wh, boxes_wh = dataset.get_info() # 最大边缩放到img_size im_wh = np.array(im_wh, dtype=np.float32) shapes = img_size * im_wh / im_wh.max(1, keepdims=True) wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)]) # wh # Filter 过滤掉小目标 i = (wh0 < 3.0).any(1).sum() if i: print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') wh = wh0[(wh0 >= 2.0).any(1)] # 只保留wh都大于等于2个像素的box # Kmeans calculation # print(f'Running kmeans for {n} anchors on {len(wh)} points...') # s = wh.std(0) # sigmas for whitening # k, dist = kmeans(wh / s, n, iter=30) # points, mean distance # assert len(k) == n, print(f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') # k *= s k = k_means(wh, n) # 按面积排序 k = k[np.argsort(k.prod(1))] # sort small to large f, bpr = anchor_fitness(k, wh, thr) print("kmeans: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k])) print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}") # Evolve # 遗传算法(在kmeans的结果基础上变异mutation) npr = np.random f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:') # progress bar for _ in pbar: v = np.ones(sh) while (v == 1).all(): # mutate until a change occurs (prevent duplicates) v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) kg = (k.copy() * v).clip(min=2.0) fg, bpr = anchor_fitness(kg, wh, thr) if fg > f: f, k = fg, kg.copy() pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' # 按面积排序 k = k[np.argsort(k.prod(1))] # sort small to large print("genetic: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k])) print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}") if __name__ == "__main__": main() ================================================ FILE: others_project/kmeans_anchors/plot_kmeans.py ================================================ import numpy as np from matplotlib import pyplot as plt np.random.seed(0) colors = np.array(['blue', 'black']) def plot_clusters(data, cls, clusters, title=""): if cls is None: c = [colors[0]] * data.shape[0] else: c = colors[cls].tolist() plt.scatter(data[:, 0], data[:, 1], c=c) for i, clus in enumerate(clusters): plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150) plt.title(title) plt.show() plt.close() def distances(data, clusters): xy1 = data[:, None] # [N,1,2] xy2 = clusters[None] # [1,M,2] d = np.sum(np.power(xy2 - xy1, 2), axis=-1) return d def k_means(data, k, dist=np.mean): """ k-means methods Args: data: 需要聚类的data k: 簇数(聚成几类) dist: 更新簇坐标的方法 """ data_number = data.shape[0] last_nearest = np.zeros((data_number,)) # init k clusters clusters = data[np.random.choice(data_number, k, replace=False)] print(f"random cluster: \n {clusters}") # plot plot_clusters(data, None, clusters, "random clusters") step = 0 while True: d = distances(data, clusters) current_nearest = np.argmin(d, axis=1) # plot plot_clusters(data, current_nearest, clusters, f"step {step}") if (last_nearest == current_nearest).all(): break # clusters won't change for cluster in range(k): # update clusters clusters[cluster] = dist(data[current_nearest == cluster], axis=0) last_nearest = current_nearest step += 1 return clusters def main(): x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)] x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)] x = np.concatenate([x1, x2]) y = np.concatenate([y1, y2]) plt.scatter(x, y, c='blue') plt.title("initial data") plt.show() plt.close() clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2) print(f"k-means fluster: \n {clusters}") if __name__ == '__main__': main() ================================================ FILE: others_project/kmeans_anchors/read_voc.py ================================================ import os from tqdm import tqdm from lxml import etree class VOCDataSet(object): def __init__(self, voc_root, year="2012", txt_name: str = "train.txt"): assert year in ["2007", "2012"], "year must be in ['2007', '2012']" self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") self.annotations_root = os.path.join(self.root, "Annotations") # read train.txt or val.txt file txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name) assert os.path.exists(txt_path), "not found {} file.".format(txt_name) with open(txt_path) as read: self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml") for line in read.readlines() if len(line.strip()) > 0] # check file assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path) for xml_path in self.xml_list: assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path) def __len__(self): return len(self.xml_list) def parse_xml_to_dict(self, xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = self.parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def get_info(self): im_wh_list = [] boxes_wh_list = [] for xml_path in tqdm(self.xml_list, desc="read data info."): # read xml with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] im_height = int(data["size"]["height"]) im_width = int(data["size"]["width"]) wh = [] for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height]) if len(wh) == 0: continue im_wh_list.append([im_width, im_height]) boxes_wh_list.append(wh) return im_wh_list, boxes_wh_list ================================================ FILE: others_project/kmeans_anchors/yolo_kmeans.py ================================================ import numpy as np def wh_iou(wh1, wh2): # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = np.minimum(wh1, wh2).prod(2) # [N,M] return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) def k_means(boxes, k, dist=np.median): """ yolo k-means methods refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py Args: boxes: 需要聚类的bboxes k: 簇数(聚成几类) dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好) """ box_number = boxes.shape[0] last_nearest = np.zeros((box_number,)) # np.random.seed(0) # 固定随机数种子 # init k clusters clusters = boxes[np.random.choice(box_number, k, replace=False)] while True: distances = 1 - wh_iou(boxes, clusters) current_nearest = np.argmin(distances, axis=1) if (last_nearest == current_nearest).all(): break # clusters won't change for cluster in range(k): # update clusters clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0) last_nearest = current_nearest return clusters ================================================ FILE: others_project/openvinotest/openvino_cls_test/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: others_project/openvinotest/openvino_cls_test/create_imagenet_annotation.py ================================================ import os import glob image_dir = "/home/w180662/my_project/my_github/data_set/flower_data/train" assert os.path.exists(image_dir), "image dir does not exist..." img_list = glob.glob(os.path.join(image_dir, "*", "*.jpg")) assert len(img_list) > 0, "No images(.jpg) were found in image dir..." classes_info = os.listdir(image_dir) classes_info.sort() classes_dict = {} # create label file with open("my_labels.txt", "w") as lw: # 注意,没有背景时,index要从0开始 for index, c in enumerate(classes_info, start=0): txt = "{}:{}".format(index, c) if index != len(classes_info): txt += "\n" lw.write(txt) classes_dict.update({c: str(index)}) print("create my_labels.txt successful...") # create annotation file with open("my_annotation.txt", "w") as aw: for img in img_list: img_classes = classes_dict[img.split("/")[-2]] txt = "{} {}".format(img, img_classes) if index != len(img_list): txt += "\n" aw.write(txt) print("create my_annotation.txt successful...") ================================================ FILE: others_project/openvinotest/openvino_cls_test/float32vsint8.py ================================================ import os import time import torch from torchvision import transforms, datasets from tqdm import tqdm import numpy as np from openvino.inference_engine import IECore device = torch.device("cpu") def check_path_exist(path): assert os.path.exists(path), "{} does not exist...".format(path) def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def openvino_model_speed(data_loader, val_num, xml_path, bin_path): device = "CPU" model_xml_path = xml_path model_bin_path = bin_path check_path_exist(model_xml_path) check_path_exist(model_bin_path) # inference engine ie = IECore() # read IR net = ie.read_network(model=model_xml_path, weights=model_bin_path) # load model exec_net = ie.load_network(network=net, device_name=device) # check supported layers for device if device == "CPU": supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) > 0: print("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") raise ValueError("device {} not support layers:\n {}".format(device, ",".join(not_supported_layers))) # get input and output name input_blob = next(iter(net.input_info)) output_blob = next(iter(net.outputs)) # set batch size batch_size = 1 net.batch_size = batch_size # read and pre-process input images # n, c, h, w = net.input_info[input_blob].input_data.shape forward_time = 0 acc = 0.0 # accumulate accurate number / epoch for val_data in tqdm(data_loader, desc="Running onnx model..."): val_images, val_labels = val_data input_dict = {input_blob: to_numpy(val_images)} # start sync inference t1 = time.time() res = exec_net.infer(inputs=input_dict) t2 = time.time() forward_time += (t2 - t1) outputs = res[output_blob] predict_y = np.argmax(outputs, axis=1) acc += (predict_y == to_numpy(val_labels)).sum() val_accurate = acc / val_num fps = round(val_num / forward_time, 1) print("openvino info:\nfps: {}/s accuracy: {}\n".format(fps, val_accurate)) def main(): data_transform = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) data_root = "/home/w180662/my_project/my_github" # get data root path image_path = os.path.join(data_root, "data_set/flower_data/") # flower data set path check_path_exist(image_path) batch_size = 1 validate_dataset = datasets.ImageFolder(root=image_path + "val", transform=data_transform) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=4) openvino_model_speed(validate_loader, val_num, "./resnet34.xml", "./resnet34.bin") openvino_model_speed(validate_loader, val_num, "./resnet34a.xml", "./resnet34a.bin") if __name__ == '__main__': main() ================================================ FILE: others_project/openvinotest/openvino_cls_test/main.py ================================================ import sys import cv2 import os import glob import json import numpy as np import logging as log from openvino.inference_engine import IECore def main(): device = "CPU" model_xml_path = "./resnet34.xml" model_bin_path = "./resnet34.bin" image_path = "./" class_json_path = './class_indices.json' # set log format log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) assert os.path.exists(model_xml_path), ".xml file does not exist..." assert os.path.exists(model_bin_path), ".bin file does not exist..." # search *.jpg files image_list = glob.glob(os.path.join(image_path, "*.jpg")) assert len(image_list) > 0, "no image(.jpg) be found..." # load class label assert os.path.exists(class_json_path), "class_json_path does not exist..." json_file = open(class_json_path, 'r') class_indict = json.load(json_file) # inference engine ie = IECore() # read IR net = ie.read_network(model=model_xml_path, weights=model_bin_path) # load model exec_net = ie.load_network(network=net, device_name=device) # check supported layers for device if device == "CPU": supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) > 0: log.error("device {} not support layers:\n {}".format(device, ",".join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) # get input and output name input_blob = next(iter(net.input_info)) output_blob = next(iter(net.outputs)) # set batch size batch_size = 1 net.batch_size = batch_size # read and pre-process input images n, c, h, w = net.input_info[input_blob].input_data.shape # images = np.ndarray(shape=(n, c, h, w)) # inference every image for i in range(len(image_list)): image = cv2.imread(image_list[i]) if image.shape[:-1] != (h, w): image = cv2.resize(image, (w, h)) # bgr(opencv default format) -> rgb image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # pre-process image = (image / 255.).astype(np.float32) image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] # change data from HWC to CHW image = image.transpose((2, 0, 1)) # add batch dimension image = np.expand_dims(image, axis=0) # start sync inference res = exec_net.infer(inputs={input_blob: image}) prediction = np.squeeze(res[output_blob]) # print(prediction) # np softmax process prediction -= np.max(prediction, keepdims=True) # 为了稳定地计算softmax概率, 一般会减掉最大元素 prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True) class_index = np.argmax(prediction, axis=0) print("prediction: '{}'\nclass:{} probability:{}\n".format(image_list[i], class_indict[str(class_index)], np.around(prediction[class_index]), 2)) if __name__ == '__main__': main() ================================================ FILE: others_project/openvinotest/openvino_cls_test/model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: others_project/openvinotest/openvino_cls_test/speed_test.py ================================================ import os import time import torch from torchvision import transforms, datasets from tqdm import tqdm import onnx import onnxruntime import numpy as np from openvino.inference_engine import IECore from model import resnet34 device = torch.device("cpu") def check_path_exist(path): assert os.path.exists(path), "{} does not exist...".format(path) def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def pytorch_model_speed(data_loader, val_num): net = resnet34(num_classes=5) # load weights model_weight_path = "./resNet34.pth" check_path_exist(model_weight_path) net.load_state_dict(torch.load(model_weight_path, map_location=device), strict=False) net.eval() test_data = torch.rand((1, 3, 224, 224)) net(test_data.to(device)) forward_time = 0 acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): for val_data in tqdm(data_loader, desc="Running pytorch model..."): val_images, val_labels = val_data t1 = time.time() outputs = net(val_images.to(device)) # eval model only have last output layer t2 = time.time() forward_time += (t2 - t1) predict_y = torch.max(outputs, dim=1)[1] acc += (predict_y == val_labels.to(device)).sum().item() val_accurate = acc / val_num fps = round(val_num / forward_time, 1) print("pytorch info:\nfps: {}/s accuracy: {}\n".format(fps, val_accurate)) return fps, val_accurate, "Pytorch(not opt)" def onnx_model_speed(data_loader, val_num): # check onnx model onnx_path = "./resnet34.onnx" check_path_exist(onnx_path) onnx_model = onnx.load(onnx_path) onnx.checker.check_model(onnx_model) ort_session = onnxruntime.InferenceSession(onnx_path) input_name = ort_session.get_inputs()[0].name forward_time = 0 acc = 0.0 # accumulate accurate number / epoch for val_data in tqdm(data_loader, desc="Running onnx model..."): val_images, val_labels = val_data input_dict = {input_name: to_numpy(val_images)} t1 = time.time() outputs = ort_session.run(None, input_dict) t2 = time.time() forward_time += (t2 - t1) outputs = outputs[0] predict_y = np.argmax(outputs, axis=1) acc += (predict_y == to_numpy(val_labels)).sum() val_accurate = acc / val_num fps = round(val_num / forward_time, 1) print("onnx info:\nfps: {}/s accuracy: {}\n".format(fps, val_accurate)) return fps, val_accurate, "ONNX" def openvino_model_speed(data_loader, val_num): device = "CPU" model_xml_path = "./resnet34r.xml" model_bin_path = "./resnet34r.bin" check_path_exist(model_xml_path) check_path_exist(model_bin_path) # inference engine ie = IECore() # read IR net = ie.read_network(model=model_xml_path, weights=model_bin_path) # load model exec_net = ie.load_network(network=net, device_name=device) # check supported layers for device if device == "CPU": supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) > 0: print("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") raise ValueError("device {} not support layers:\n {}".format(device, ",".join(not_supported_layers))) # get input and output name input_blob = next(iter(net.input_info)) output_blob = next(iter(net.outputs)) # set batch size batch_size = 1 net.batch_size = batch_size # read and pre-process input images # n, c, h, w = net.input_info[input_blob].input_data.shape forward_time = 0 acc = 0.0 # accumulate accurate number / epoch for val_data in tqdm(data_loader, desc="Running onnx model..."): val_images, val_labels = val_data input_dict = {input_blob: to_numpy(val_images)} # start sync inference t1 = time.time() res = exec_net.infer(inputs=input_dict) t2 = time.time() forward_time += (t2 - t1) outputs = res[output_blob] predict_y = np.argmax(outputs, axis=1) acc += (predict_y == to_numpy(val_labels)).sum() val_accurate = acc / val_num fps = round(val_num / forward_time, 1) print("openvino info:\nfps: {}/s accuracy: {}\n".format(fps, val_accurate)) def main(): data_transform = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) data_root = "/home/w180662/my_project/my_github" # get data root path image_path = os.path.join(data_root, "data_set/flower_data/") # flower data set path check_path_exist(image_path) batch_size = 1 validate_dataset = datasets.ImageFolder(root=image_path + "val", transform=data_transform) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=4) pytorch_model_speed(validate_loader, val_num) onnx_model_speed(validate_loader, val_num) openvino_model_speed(validate_loader, val_num) if __name__ == '__main__': main() ================================================ FILE: others_project/readPbFile/README.md ================================================ 该项目用于读取冻结后的pb文件并进行预测 使用步骤: (1)准备好需要使用的pb冻结文件,pbtxt标签文件,测试用的图片 (2)修改info.config文件中的相关信息 ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example1.jpg) ![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example2.jpg) ================================================ FILE: others_project/readPbFile/pascal_label_map.pbtxt ================================================ item { id: 1 name: 'aeroplane' } item { id: 2 name: 'bicycle' } item { id: 3 name: 'bird' } item { id: 4 name: 'boat' } item { id: 5 name: 'bottle' } item { id: 6 name: 'bus' } item { id: 7 name: 'car' } item { id: 8 name: 'cat' } item { id: 9 name: 'chair' } item { id: 10 name: 'cow' } item { id: 11 name: 'diningtable' } item { id: 12 name: 'dog' } item { id: 13 name: 'horse' } item { id: 14 name: 'motorbike' } item { id: 15 name: 'person' } item { id: 16 name: 'pottedplant' } item { id: 17 name: 'sheep' } item { id: 18 name: 'sofa' } item { id: 19 name: 'train' } item { id: 20 name: 'tvmonitor' } ================================================ FILE: others_project/readPbFile/readPb.py ================================================ import tensorflow as tf import configparser from distutils.version import StrictVersion import cv2 import glob from using_function import draw_box, read_pbtxt, get_inAndout_tensor, convert_type, read_image if StrictVersion(tf.__version__) < StrictVersion('1.12.0'): raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.') # 读取参数配置文件 conf = configparser.ConfigParser() conf.read('info.config') path_to_frozen_graph = conf.get('tensorflow', 'path_to_frozen_graph') path_to_labels = conf.get('tensorflow', 'path_to_labels') path_to_images = conf.get('tensorflow', 'path_to_images') probability_thresh = float(conf.get('tensorflow', 'probability_thresh')) # 读取pbtxt标签信息 category_index = read_pbtxt(path_to_labels) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors tensor_dict, image_tensor = get_inAndout_tensor() test_image_paths = glob.glob(path_to_images) for image_path in test_image_paths: image_BGR, image_np_expanded = read_image(image_path) # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image_np_expanded}) # all outputs are float32 numpy arrays, so convert types as appropriate convert_type(output_dict) draw_box(image_BGR, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, thresh=probability_thresh, line_thickness=5) cv2.namedWindow("prediction", cv2.WINDOW_AUTOSIZE) cv2.imshow("prediction", image_BGR) cv2.waitKey(0) ================================================ FILE: others_project/readPbFile/test_images/image_info.txt ================================================ Image provenance: image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg image2.jpg: Michael Miley, https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4 ================================================ FILE: others_project/readPbFile/using_function.py ================================================ import collections import six import PIL.Image as Image import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont import numpy as np import tensorflow as tf import cv2 STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def filter_low_thresh(boxes, scores, classes, category_index, thresh, box_to_display_str_map, box_to_color_map): for i in range(boxes.shape[0]): if scores[i] > thresh: box = tuple(boxes[i].tolist()) if classes[i] in six.viewkeys(category_index): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' display_str = str(class_name) display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) box_to_display_str_map[box].append(display_str) box_to_color_map[box] = STANDARD_COLORS[ classes[i] % len(STANDARD_COLORS)] else: break # 网络输出概率已经排序过,当遇到一个不满足后面的肯定不满足 def draw_text(draw, box_to_display_str_map, box, left, right, top, bottom, color): try: font = ImageFont.truetype('arial.ttf', 24) except IOError: font = ImageFont.load_default() # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str_heights = [font.getsize(ds)[1] for ds in box_to_display_str_map[box]] # Each display_str has a top and bottom margin of 0.05x. total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) if top > total_display_str_height: text_bottom = top else: text_bottom = bottom + total_display_str_height # Reverse list and print from bottom to top. for display_str in box_to_display_str_map[box][::-1]: text_width, text_height = font.getsize(display_str) margin = np.ceil(0.05 * text_height) draw.rectangle([(left, text_bottom - text_height - 2 * margin), (left + text_width, text_bottom)], fill=color) draw.text((left + margin, text_bottom - text_height - margin), display_str, fill='black', font=font) text_bottom -= text_height - 2 * margin def draw_box(image, boxes, classes, scores, category_index, thresh=0.5, line_thickness=8): box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) filter_low_thresh(boxes, scores, classes, category_index, thresh, box_to_display_str_map, box_to_color_map) # Draw all boxes onto image. for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box image_pil = Image.fromarray(np.uint8(image)).convert('RGB') draw = ImageDraw.Draw(image_pil) im_width, im_height = image_pil.size (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height) draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) draw_text(draw, box_to_display_str_map, box, left, right, top, bottom, color) np.copyto(image, np.array(image_pil)) return image def read_pbtxt(filename): category_index = {} with open(filename, 'r') as reader: txt = str(reader.read()) txt = txt.replace(" ", "").replace("{", "").replace("}", "") txtList = txt.split("item")[1:] for index, line in enumerate(txtList): line = line.strip("\n").split('\n') category_index[index + 1] = {'id': int(line[0][3:]), 'name': line[1][6: -1]} return category_index def get_inAndout_tensor(): ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} outputKeys = ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'] for key in outputKeys: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') return tensor_dict, image_tensor def convert_type(output_dict): output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.int64) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] def read_image(image_path): image_BGR = cv2.imread(image_path) image_RGB = np.zeros_like(image_BGR) cv2.cvtColor(image_BGR, cv2.COLOR_BGR2RGB, image_RGB) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_RGB, axis=0) return image_BGR, image_np_expanded ================================================ FILE: others_project/textcnnKeras/dataGenerator.py ================================================ from tensorflow import keras from sklearn.preprocessing import LabelEncoder import random def content2idList(content, word2id_dict): """ 该函数的目的是将文本转换为对应的汉字数字id content:输入的文本 word2id_dict:用于查找转换的字典 """ idList = [] for word in content: # 遍历每一个汉字 if word in word2id_dict: # 当刚文字在字典中时才进行转换,否则丢弃 idList.append(word2id_dict[word]) return idList def generatorInfo(batch_size, seq_length, num_classes, file_name): """ batch_size:生成数据的batch size seq_length:输入文字序列长度 num_classes:文本的类别数 file_name:读取文件的路径 """ # 读取词库文件 with open('./cnews/cnews.vocab.txt', encoding='utf-8') as file: vocabulary_list = [k.strip() for k in file.readlines()] word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)]) # 读取文本文件 with open(file_name, encoding='utf-8') as file: line_list = [k.strip() for k in file.readlines()] data_label_list = [] # 创建数据标签文件 data_content_list = [] # 创建数据文本文件 for k in line_list: t = k.split(maxsplit=1) data_label_list.append(t[0]) data_content_list.append(t[1]) data_id_list = [content2idList(content, word2id_dict) for content in data_content_list] # 将文本数据转换拿为数字序列 # 将list数据类型转换为ndarray数据类型,并按照seq_length长度去统一化文本序列长度, # 若长度超过设定值将其截断保留后半部分,若长度不足前面补0 data_X = keras.preprocessing.sequence.pad_sequences(data_id_list, seq_length, truncating='pre') labelEncoder = LabelEncoder() data_y = labelEncoder.fit_transform(data_label_list) # 将文字标签转为数字标签 data_Y = keras.utils.to_categorical(data_y, num_classes) # 将数字标签转为one-hot标签 while True: selected_index = random.sample(list(range(len(data_y))), k=batch_size) # 按照数据集合的长度随机抽取batch_size个数据的index batch_X = data_X[selected_index] # 随机抽取的文本信息(数字化序列) batch_Y = data_Y[selected_index] # 随机抽取的标签信息(one-hot编码) yield (batch_X, batch_Y) ================================================ FILE: others_project/textcnnKeras/data_link.txt ================================================ baidupan_url = "https://pan.baidu.com/s/1w452Z5eXbQSDQfgEBNUdlg" extract_code = "8cwv" ================================================ FILE: others_project/textcnnKeras/main.py ================================================ from models import text_cnn, simpleNet, text_cnn_V2 from dataGenerator import generatorInfo from tensorflow import keras vocab_size = 5000 seq_length = 600 embedding_dim = 64 num_classes = 10 trainBatchSize = 64 evalBatchSize = 200 steps_per_epoch = 50000 // trainBatchSize epoch = 2 workers = 4 logdir = './log/' trainFileName = './cnews/cnews.train.txt' evalFileName = './cnews/cnews.test.txt' model = text_cnn(seq_length=seq_length, vocab_size=vocab_size, embedding_dim=embedding_dim, num_cla=num_classes, kernelNum=64) trainGenerator = generatorInfo(trainBatchSize, seq_length, num_classes, trainFileName) evalGenerator = generatorInfo(evalBatchSize, seq_length, num_classes, evalFileName) def lrSchedule(epoch): lr = keras.backend.get_value(model.optimizer.lr) if epoch % 1 == 0 and epoch != 0: lr = lr * 0.5 return lr log = keras.callbacks.TensorBoard(log_dir=logdir, update_freq=500) reduceLr = keras.callbacks.LearningRateScheduler(lrSchedule, verbose=1) model.fit_generator(generator=trainGenerator, steps_per_epoch=steps_per_epoch, epochs=epoch, validation_data=evalGenerator, validation_steps=10, workers=1, callbacks=[log, reduceLr]) model.save_weights(logdir + 'train_weight.h5') ================================================ FILE: others_project/textcnnKeras/models.py ================================================ from tensorflow import keras def text_cnn(seq_length, vocab_size, embedding_dim, num_cla, kernelNum): """ :param seq_length: 输入的文字序列长度 :param vocab_size: 词汇库的大小 :param embedding_dim: 生成词向量的特征维度 :param num_cla: 分类类别 :return: keras model """ inputX = keras.layers.Input(shape=(seq_length,), dtype='int32') embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX) # 分别使用长度为3,4,5的词窗去执行卷积 conv1 = keras.layers.Conv1D(kernelNum, 3, padding='valid', strides=1, activation='relu')(embOut) maxp1 = keras.layers.MaxPool1D(pool_size=int(conv1.shape[1]))(conv1) conv2 = keras.layers.Conv1D(kernelNum, 4, padding='valid', strides=1, activation='relu')(embOut) maxp2 = keras.layers.MaxPool1D(pool_size=int(conv2.shape[1]))(conv2) conv3 = keras.layers.Conv1D(kernelNum, 5, padding='valid', strides=1, activation='relu')(embOut) maxp3 = keras.layers.MaxPool1D(pool_size=int(conv3.shape[1]))(conv3) # 合并三个模型的输出向量 cnn = keras.layers.Concatenate(axis=-1)([maxp1, maxp2, maxp3]) flat = keras.layers.Flatten()(cnn) dense1 = keras.layers.Dense(128)(flat) drop = keras.layers.Dropout(0.25)(dense1) denseRelu = keras.layers.ReLU()(drop) predictY = keras.layers.Dense(num_cla, activation='softmax')(denseRelu) # 编译模型 model = keras.models.Model(inputs=inputX, outputs=predictY) # 指定loss的计算方法,设置优化器,编译模型 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model def text_cnn_V2(seq_length, vocab_size, embedding_dim, num_cla, kernelNum=128): """ :param seq_length: 输入的文字序列长度 :param vocab_size: 词汇库的大小 :param embedding_dim: 生成词向量的特征维度 :param num_cla: 分类类别 :return: keras model """ inputX = keras.layers.Input(shape=(seq_length,), dtype='int32') embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX) # 分别使用长度为3,4,5的词窗去执行卷积 conv1 = keras.layers.Conv1D(kernelNum, 3, padding='valid', strides=1, activation='relu')(embOut) maxp1 = keras.layers.SeparableConv1D(filters=int(conv1.shape[2]), kernel_size=int(conv1.shape[1]))(conv1) conv2 = keras.layers.Conv1D(kernelNum, 4, padding='valid', strides=1, activation='relu')(embOut) maxp2 = keras.layers.SeparableConv1D(filters=int(conv2.shape[2]), kernel_size=int(conv2.shape[1]))(conv2) conv3 = keras.layers.Conv1D(kernelNum, 5, padding='valid', strides=1, activation='relu')(embOut) maxp3 = keras.layers.SeparableConv1D(filters=int(conv3.shape[2]), kernel_size=int(conv3.shape[1]))(conv3) # 合并三个模型的输出向量 cnn = keras.layers.Concatenate(axis=2)([maxp1, maxp2, maxp3]) bn = keras.layers.BatchNormalization()(cnn) conv4 = keras.layers.Conv1D(num_cla, kernel_size=int(cnn.shape[1]), activation='softmax')(bn) # predictY = keras.layers.Lambda(keras.backend.squeeze, arguments={'axis': 1})(conv4) predictY = keras.layers.Flatten()(conv4) # 编译模型 model = keras.models.Model(inputs=inputX, outputs=predictY) # 指定loss的计算方法,设置优化器,编译模型 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model def simpleNet(seq_length, vocab_size, embedding_dim, num_cla, kernelNum=128): inputX = keras.layers.Input(shape=(seq_length,), dtype='int32') embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX) # 使用长度为5的词窗去执行卷积 conv1 = keras.layers.Conv1D(kernelNum, 5, padding='same', strides=1)(embOut) maxp1 = keras.layers.MaxPool1D(pool_size=int(conv1.shape[1]))(conv1) flat = keras.layers.Flatten()(maxp1) dense1 = keras.layers.Dense(128)(flat) drop = keras.layers.Dropout(0.25)(dense1) denseRelu = keras.layers.ReLU()(drop) predictY = keras.layers.Dense(num_cla, activation='softmax')(denseRelu) # 编译模型 model = keras.models.Model(inputs=inputX, outputs=predictY) # 指定loss的计算方法,设置优化器,编译模型 model.compile(optimizer=keras.optimizers.Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy']) return model ================================================ FILE: others_project/trans_widerface_to_xml/create_xml.py ================================================ import copy import os from xml.dom import minidom as dom class XMLGenerator(object): def __init__(self, xml_name: str): self.doc = dom.Document() self.xml_name = xml_name def create_append_node(self, node_name, root_node=None): """创建一个新node并将node添加到root_node下""" new_node = self.doc.createElement(node_name) if root_node is not None: root_node.appendChild(new_node) else: self.doc.appendChild(new_node) return new_node def create_text_node(self, node_name, node_value, root_node): """ 创建一个新node,然后在该node中添加一个text_node, 最后将node添加到root_node下 """ new_node = self.doc.createElement(node_name) node_data = self.doc.createTextNode(node_value) new_node.appendChild(node_data) root_node.appendChild(new_node) def create_object_node(self, info_dict: dict = None, root_node: str = None): if (info_dict is None) or (root_node is None): return object_node = self.create_append_node('object', root_node) box_node = self.create_append_node('bndbox', object_node) self.create_text_node("xmin", info_dict.pop("xmin"), box_node) self.create_text_node("ymin", info_dict.pop("ymin"), box_node) self.create_text_node("xmax", info_dict.pop("xmax"), box_node) self.create_text_node("ymax", info_dict.pop("ymax"), box_node) for k, v in info_dict.items(): self.create_text_node(k, v, object_node) def save_xml(self): f = open(self.xml_name, "w") self.doc.writexml(f, addindent="\t", newl="\n") f.close() def create_pascal_voc_xml(filename: str = None, years: str = 'VOC2012', source_dict: dict = None, objects_list: list = None, im_shape: tuple = None, save_root: str = os.getcwd(), cover: bool = False): if not (filename and source_dict and objects_list and im_shape): return # 0--Parade/0_Parade_marchingband_1_849.jpg -> 0_Parade_marchingband_1_849.xml xml_name = filename.split(os.sep)[-1].split(".")[0] + '.xml' xml_full_path = os.path.join(save_root, xml_name) if os.path.exists(xml_full_path) and (cover is False): print(f"{xml_full_path} already exist, skip.") return xml_generator = XMLGenerator(xml_full_path) # xml root node node_root = xml_generator.create_append_node('annotation') xml_generator.create_text_node(node_name='folder', node_value=years, root_node=node_root) xml_generator.create_text_node(node_name='filename', node_value=filename, root_node=node_root) # source node_source = xml_generator.create_append_node('source', root_node=node_root) xml_generator.create_text_node(node_name='database', node_value=source_dict['database'], root_node=node_source) xml_generator.create_text_node(node_name='annotation', node_value=source_dict['annotation'], root_node=node_source) xml_generator.create_text_node(node_name='image', node_value=source_dict['image'], root_node=node_source) # size node_size = xml_generator.create_append_node('size', root_node=node_root) xml_generator.create_text_node(node_name='height', node_value=str(im_shape[0]), root_node=node_size) xml_generator.create_text_node(node_name='width', node_value=str(im_shape[1]), root_node=node_size) xml_generator.create_text_node(node_name='depth', node_value=str(im_shape[2]), root_node=node_size) # segmented xml_generator.create_text_node(node_name='segmented', node_value='0', root_node=node_root) # object for i, ob in enumerate(objects_list): xml_generator.create_object_node(info_dict=ob, root_node=node_root) # XML write xml_generator.save_xml() def create_xml_test(): objects = [] ob = {'name': 'person', 'pose': 'Unspecified', 'truncated': '0', 'difficult': '0', 'xmin': '174', 'ymin': '101', 'xmax': '349', 'ymax': '351'} objects.append(ob) objects.append(copy.deepcopy(ob)) years = 'VOC2012' filename = 'test.jpg' source_dict = {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr'} im_width = '500' im_height = '700' im_depth = '3' im_shape = (im_width, im_height, im_depth) create_pascal_voc_xml(filename=filename, years=years, source_dict=source_dict, objects_list=objects, im_shape=im_shape) ================================================ FILE: others_project/trans_widerface_to_xml/main.py ================================================ import os from tqdm import tqdm import cv2 from create_xml import create_pascal_voc_xml def create_xml(labels: list, img_root: str, img_path: str, save_root: str) -> bool: source_dict = {'database': 'The WIDERFACE2017 Database', 'annotation': 'WIDERFACE 2017', 'image': 'WIDERFACE'} img_full_path = os.path.join(img_root, img_path) if os.path.exists(img_full_path): im = cv2.imread(img_full_path) im_shape = im.shape else: print(f"Warning: {img_path} does not exist, can't read image shape.") im_shape = (0, 0, 0) ob_list = [] for ob in labels: if ob[7] == '1': # invalid face image, skip continue if int(ob[2]) <= 0 or int(ob[3]) <= 0: print(f"Warning: find bbox w or h <= 0, in {img_path}, skip.") continue ob_dict = {'name': 'face', 'truncated': '0' if ob[8] == '0' else '1', 'difficult': '1' if ob[4] == '2' or ob[8] == '2' else '0', 'xmin': ob[0], 'ymin': ob[1], 'xmax': str(int(ob[0]) + int(ob[2])), 'ymax': str(int(ob[1]) + int(ob[3])), 'blur': ob[4], 'expression': ob[5], 'illumination': ob[6], 'invalid': ob[7], 'occlusion': ob[8], 'pose': ob[9]} # if ob[7] == '1': # cv2.rectangle(im, (int(ob_dict['xmin']), int(ob_dict['ymin'])), # (int(ob_dict['xmax']), int(ob_dict['ymax'])), # (0, 0, 255)) # cv2.imshow("s", im) # cv2.waitKey(0) ob_list.append(ob_dict) if len(ob_list) == 0: print(f"in {img_path}, no object, skip.") return False create_pascal_voc_xml(filename=img_path, years="WIDERFACE2017", source_dict=source_dict, objects_list=ob_list, im_shape=im_shape, save_root=save_root) return True def parse_wider_txt(data_root: str, split: str, save_root: str): """ refer to: torchvision.dataset.widerface.py :param data_root: :param split: :param save_root: :return: """ assert split in ['train', 'val'], f"split must be in ['train', 'val'], got {split}" if os.path.exists(save_root) is False: os.makedirs(save_root) txt_path = os.path.join(data_root, 'wider_face_split', f'wider_face_{split}_bbx_gt.txt') img_root = os.path.join(data_root, f'WIDER_{split}', 'images') with open(txt_path, "r") as f: lines = f.readlines() file_name_line, num_boxes_line, box_annotation_line = True, False, False num_boxes, box_counter, idx = 0, 0, 0 labels = [] xml_list = [] progress_bar = tqdm(lines) for line in progress_bar: line = line.rstrip() if file_name_line: img_path = line file_name_line = False num_boxes_line = True elif num_boxes_line: num_boxes = int(line) num_boxes_line = False box_annotation_line = True elif box_annotation_line: box_counter += 1 line_split = line.split(" ") line_values = [x for x in line_split] labels.append(line_values) if box_counter >= num_boxes: box_annotation_line = False file_name_line = True if num_boxes == 0: print(f"in {img_path}, no object, skip.") else: if create_xml(labels, img_root, img_path, save_root): # 只记录有目标的xml文件 xml_list.append(img_path.split("/")[-1].split(".")[0]) box_counter = 0 labels.clear() idx += 1 progress_bar.set_description(f"{idx} images") else: raise RuntimeError("Error parsing annotation file {}".format(txt_path)) with open(split+'.txt', 'w') as w: w.write("\n".join(xml_list)) parse_wider_txt("/data/wider_face/", "val", "./annotation/") ================================================ FILE: pytorch_classification/ConfusionMatrix/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/ConfusionMatrix/main.py ================================================ import os import json import torch from torchvision import transforms, datasets import numpy as np from tqdm import tqdm import matplotlib.pyplot as plt from prettytable import PrettyTable from model import MobileNetV2 class ConfusionMatrix(object): """ 注意,如果显示的图像不全,是matplotlib版本问题 本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常 需要额外安装prettytable库 """ def __init__(self, num_classes: int, labels: list): self.matrix = np.zeros((num_classes, num_classes)) self.num_classes = num_classes self.labels = labels def update(self, preds, labels): for p, t in zip(preds, labels): self.matrix[p, t] += 1 def summary(self): # calculate accuracy sum_TP = 0 for i in range(self.num_classes): sum_TP += self.matrix[i, i] acc = sum_TP / np.sum(self.matrix) print("the model accuracy is ", acc) # precision, recall, specificity table = PrettyTable() table.field_names = ["", "Precision", "Recall", "Specificity"] for i in range(self.num_classes): TP = self.matrix[i, i] FP = np.sum(self.matrix[i, :]) - TP FN = np.sum(self.matrix[:, i]) - TP TN = np.sum(self.matrix) - TP - FP - FN Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0. Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0. Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0. table.add_row([self.labels[i], Precision, Recall, Specificity]) print(table) def plot(self): matrix = self.matrix print(matrix) plt.imshow(matrix, cmap=plt.cm.Blues) # 设置x轴坐标label plt.xticks(range(self.num_classes), self.labels, rotation=45) # 设置y轴坐标label plt.yticks(range(self.num_classes), self.labels) # 显示colorbar plt.colorbar() plt.xlabel('True Labels') plt.ylabel('Predicted Labels') plt.title('Confusion matrix') # 在图中标注数量/概率信息 thresh = matrix.max() / 2 for x in range(self.num_classes): for y in range(self.num_classes): # 注意这里的matrix[y, x]不是matrix[x, y] info = int(matrix[y, x]) plt.text(x, y, info, verticalalignment='center', horizontalalignment='center', color="white" if info > thresh else "black") plt.tight_layout() plt.show() if __name__ == '__main__': device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) data_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "data path {} does not exist.".format(image_path) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform) batch_size = 16 validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=2) net = MobileNetV2(num_classes=5) # load pretrain weights model_weight_path = "./MobileNetV2.pth" assert os.path.exists(model_weight_path), "cannot find {} file".format(model_weight_path) net.load_state_dict(torch.load(model_weight_path, map_location=device)) net.to(device) # read class_indict json_label_path = './class_indices.json' assert os.path.exists(json_label_path), "cannot find {} file".format(json_label_path) json_file = open(json_label_path, 'r') class_indict = json.load(json_file) labels = [label for _, label in class_indict.items()] confusion = ConfusionMatrix(num_classes=5, labels=labels) net.eval() with torch.no_grad(): for val_data in tqdm(validate_loader): val_images, val_labels = val_data outputs = net(val_images.to(device)) outputs = torch.softmax(outputs, dim=1) outputs = torch.argmax(outputs, dim=1) confusion.update(outputs.to("cpu").numpy(), val_labels.to("cpu").numpy()) confusion.plot() confusion.summary() ================================================ FILE: pytorch_classification/ConfusionMatrix/model.py ================================================ from torch import nn import torch def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(nn.Sequential): def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1): padding = (kernel_size - 1) // 2 super(ConvBNReLU, self).__init__( nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False), nn.BatchNorm2d(out_channel), nn.ReLU6(inplace=True) ) class InvertedResidual(nn.Module): def __init__(self, in_channel, out_channel, stride, expand_ratio): super(InvertedResidual, self).__init__() hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel layers = [] if expand_ratio != 1: # 1x1 pointwise conv layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1)) layers.extend([ # 3x3 depthwise conv ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel), # 1x1 pointwise conv(linear) nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False), nn.BatchNorm2d(out_channel), ]) self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_shortcut: return x + self.conv(x) else: return self.conv(x) class MobileNetV2(nn.Module): def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] features = [] # conv1 layer features.append(ConvBNReLU(3, input_channel, stride=2)) # building inverted residual residual blockes for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel # building last several layers features.append(ConvBNReLU(input_channel, last_channel, 1)) # combine feature layers self.features = nn.Sequential(*features) # building classifier self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(last_channel, num_classes) ) # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ================================================ FILE: pytorch_classification/ConvNeXt/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/ConvNeXt/model.py ================================================ """ original code from facebook research: https://github.com/facebookresearch/ConvNeXt """ import torch import torch.nn as nn import torch.nn.functional as F def drop_path(x, drop_prob: float = 0., training: bool = False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class LayerNorm(nn.Module): r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height, width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width). """ def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): super().__init__() self.weight = nn.Parameter(torch.ones(normalized_shape), requires_grad=True) self.bias = nn.Parameter(torch.zeros(normalized_shape), requires_grad=True) self.eps = eps self.data_format = data_format if self.data_format not in ["channels_last", "channels_first"]: raise ValueError(f"not support data format '{self.data_format}'") self.normalized_shape = (normalized_shape,) def forward(self, x: torch.Tensor) -> torch.Tensor: if self.data_format == "channels_last": return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) elif self.data_format == "channels_first": # [batch_size, channels, height, width] mean = x.mean(1, keepdim=True) var = (x - mean).pow(2).mean(1, keepdim=True) x = (x - mean) / torch.sqrt(var + self.eps) x = self.weight[:, None, None] * x + self.bias[:, None, None] return x class Block(nn.Module): r""" ConvNeXt Block. There are two equivalent implementations: (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back We use (2) as we find it slightly faster in PyTorch Args: dim (int): Number of input channels. drop_rate (float): Stochastic depth rate. Default: 0.0 layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. """ def __init__(self, dim, drop_rate=0., layer_scale_init_value=1e-6): super().__init__() self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv self.norm = LayerNorm(dim, eps=1e-6, data_format="channels_last") self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers self.act = nn.GELU() self.pwconv2 = nn.Linear(4 * dim, dim) self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim,)), requires_grad=True) if layer_scale_init_value > 0 else None self.drop_path = DropPath(drop_rate) if drop_rate > 0. else nn.Identity() def forward(self, x: torch.Tensor) -> torch.Tensor: shortcut = x x = self.dwconv(x) x = x.permute(0, 2, 3, 1) # [N, C, H, W] -> [N, H, W, C] x = self.norm(x) x = self.pwconv1(x) x = self.act(x) x = self.pwconv2(x) if self.gamma is not None: x = self.gamma * x x = x.permute(0, 3, 1, 2) # [N, H, W, C] -> [N, C, H, W] x = shortcut + self.drop_path(x) return x class ConvNeXt(nn.Module): r""" ConvNeXt A PyTorch impl of : `A ConvNet for the 2020s` - https://arxiv.org/pdf/2201.03545.pdf Args: in_chans (int): Number of input image channels. Default: 3 num_classes (int): Number of classes for classification head. Default: 1000 depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] drop_path_rate (float): Stochastic depth rate. Default: 0. layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. """ def __init__(self, in_chans: int = 3, num_classes: int = 1000, depths: list = None, dims: list = None, drop_path_rate: float = 0., layer_scale_init_value: float = 1e-6, head_init_scale: float = 1.): super().__init__() self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers stem = nn.Sequential(nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4), LayerNorm(dims[0], eps=1e-6, data_format="channels_first")) self.downsample_layers.append(stem) # 对应stage2-stage4前的3个downsample for i in range(3): downsample_layer = nn.Sequential(LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2)) self.downsample_layers.append(downsample_layer) self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple blocks dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] cur = 0 # 构建每个stage中堆叠的block for i in range(4): stage = nn.Sequential( *[Block(dim=dims[i], drop_rate=dp_rates[cur + j], layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])] ) self.stages.append(stage) cur += depths[i] self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer self.head = nn.Linear(dims[-1], num_classes) self.apply(self._init_weights) self.head.weight.data.mul_(head_init_scale) self.head.bias.data.mul_(head_init_scale) def _init_weights(self, m): if isinstance(m, (nn.Conv2d, nn.Linear)): nn.init.trunc_normal_(m.weight, std=0.2) nn.init.constant_(m.bias, 0) def forward_features(self, x: torch.Tensor) -> torch.Tensor: for i in range(4): x = self.downsample_layers[i](x) x = self.stages[i](x) return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C) def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.forward_features(x) x = self.head(x) return x def convnext_tiny(num_classes: int): # https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], num_classes=num_classes) return model def convnext_small(num_classes: int): # https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], num_classes=num_classes) return model def convnext_base(num_classes: int): # https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth # https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], num_classes=num_classes) return model def convnext_large(num_classes: int): # https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth # https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], num_classes=num_classes) return model def convnext_xlarge(num_classes: int): # https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth model = ConvNeXt(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], num_classes=num_classes) return model ================================================ FILE: pytorch_classification/ConvNeXt/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/ConvNeXt/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import convnext_tiny as create_model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"using {device} device.") num_classes = 5 img_size = 224 data_transform = transforms.Compose( [transforms.Resize(int(img_size * 1.14)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=num_classes).to(device) # load model weights model_weight_path = "./weights/best_model.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/ConvNeXt/train.py ================================================ import os import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from my_dataset import MyDataSet from model import convnext_tiny as create_model from utils import read_split_data, create_lr_scheduler, get_params_groups, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(f"using {device} device.") if os.path.exists("./weights") is False: os.makedirs("./weights") tb_writer = SummaryWriter() train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) img_size = 224 data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(img_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=args.num_classes).to(device) if args.weights != "": assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights) weights_dict = torch.load(args.weights, map_location=device)["model"] # 删除有关分类类别的权重 for k in list(weights_dict.keys()): if "head" in k: del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) if args.freeze_layers: for name, para in model.named_parameters(): # 除head外,其他权重全部冻结 if "head" not in name: para.requires_grad_(False) else: print("training {}".format(name)) # pg = [p for p in model.parameters() if p.requires_grad] pg = get_params_groups(model, weight_decay=args.wd) optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=args.wd) lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True, warmup_epochs=1) best_acc = 0. for epoch in range(args.epochs): # train train_loss, train_acc = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch, lr_scheduler=lr_scheduler) # validate val_loss, val_acc = evaluate(model=model, data_loader=val_loader, device=device, epoch=epoch) tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"] tb_writer.add_scalar(tags[0], train_loss, epoch) tb_writer.add_scalar(tags[1], train_acc, epoch) tb_writer.add_scalar(tags[2], val_loss, epoch) tb_writer.add_scalar(tags[3], val_acc, epoch) tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch) if best_acc < val_acc: torch.save(model.state_dict(), "./weights/best_model.pth") best_acc = val_acc if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--lr', type=float, default=5e-4) parser.add_argument('--wd', type=float, default=5e-2) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # 预训练权重路径,如果不想载入就设置为空字符 # 链接: https://pan.baidu.com/s/1aNqQW4n_RrUlWUBNlaJRHA 密码: i83t parser.add_argument('--weights', type=str, default='./convnext_tiny_1k_224_ema.pth', help='initial weights path') # 是否冻结head以外所有权重 parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/ConvNeXt/utils.py ================================================ import os import sys import json import pickle import random import math import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler): model.train() loss_function = torch.nn.CrossEntropyLoss() accu_loss = torch.zeros(1).to(device) # 累计损失 accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 optimizer.zero_grad() sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) loss.backward() accu_loss += loss.detach() data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}, lr: {:.5f}".format( epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num, optimizer.param_groups[0]["lr"] ) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() # update lr lr_scheduler.step() return accu_loss.item() / (step + 1), accu_num.item() / sample_num @torch.no_grad() def evaluate(model, data_loader, device, epoch): loss_function = torch.nn.CrossEntropyLoss() model.eval() accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 accu_loss = torch.zeros(1).to(device) # 累计损失 sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) accu_loss += loss data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format( epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num ) return accu_loss.item() / (step + 1), accu_num.item() / sample_num def create_lr_scheduler(optimizer, num_step: int, epochs: int, warmup=True, warmup_epochs=1, warmup_factor=1e-3, end_factor=1e-6): assert num_step > 0 and epochs > 0 if warmup is False: warmup_epochs = 0 def f(x): """ 根据step数返回一个学习率倍率因子, 注意在训练开始之前,pytorch会提前调用一次lr_scheduler.step()方法 """ if warmup is True and x <= (warmup_epochs * num_step): alpha = float(x) / (warmup_epochs * num_step) # warmup过程中lr倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha else: current_step = (x - warmup_epochs * num_step) cosine_steps = (epochs - warmup_epochs) * num_step # warmup后lr倍率因子从1 -> end_factor return ((1 + math.cos(current_step * math.pi / cosine_steps)) / 2) * (1 - end_factor) + end_factor return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def get_params_groups(model: torch.nn.Module, weight_decay: float = 1e-5): # 记录optimize要训练的权重参数 parameter_group_vars = {"decay": {"params": [], "weight_decay": weight_decay}, "no_decay": {"params": [], "weight_decay": 0.}} # 记录对应的权重名称 parameter_group_names = {"decay": {"params": [], "weight_decay": weight_decay}, "no_decay": {"params": [], "weight_decay": 0.}} for name, param in model.named_parameters(): if not param.requires_grad: continue # frozen weights if len(param.shape) == 1 or name.endswith(".bias"): group_name = "no_decay" else: group_name = "decay" parameter_group_vars[group_name]["params"].append(param) parameter_group_names[group_name]["params"].append(name) print("Param groups = %s" % json.dumps(parameter_group_names, indent=2)) return list(parameter_group_vars.values()) ================================================ FILE: pytorch_classification/MobileViT/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/MobileViT/model.py ================================================ """ original code from apple: https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py """ from typing import Optional, Tuple, Union, Dict import math import torch import torch.nn as nn from torch import Tensor from torch.nn import functional as F from transformer import TransformerEncoder from model_config import get_config def make_divisible( v: Union[float, int], divisor: Optional[int] = 8, min_value: Optional[Union[float, int]] = None, ) -> Union[float, int]: """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py :param v: :param divisor: :param min_value: :return: """ if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: new_v += divisor return new_v class ConvLayer(nn.Module): """ Applies a 2D convolution over an input Args: in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})` out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})` kernel_size (Union[int, Tuple[int, int]]): Kernel size for convolution. stride (Union[int, Tuple[int, int]]): Stride for convolution. Default: 1 groups (Optional[int]): Number of groups in convolution. Default: 1 bias (Optional[bool]): Use bias. Default: ``False`` use_norm (Optional[bool]): Use normalization layer after convolution. Default: ``True`` use_act (Optional[bool]): Use activation layer after convolution (or convolution and normalization). Default: ``True`` Shape: - Input: :math:`(N, C_{in}, H_{in}, W_{in})` - Output: :math:`(N, C_{out}, H_{out}, W_{out})` .. note:: For depth-wise convolution, `groups=C_{in}=C_{out}`. """ def __init__( self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, int]], stride: Optional[Union[int, Tuple[int, int]]] = 1, groups: Optional[int] = 1, bias: Optional[bool] = False, use_norm: Optional[bool] = True, use_act: Optional[bool] = True, ) -> None: super().__init__() if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) if isinstance(stride, int): stride = (stride, stride) assert isinstance(kernel_size, Tuple) assert isinstance(stride, Tuple) padding = ( int((kernel_size[0] - 1) / 2), int((kernel_size[1] - 1) / 2), ) block = nn.Sequential() conv_layer = nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, groups=groups, padding=padding, bias=bias ) block.add_module(name="conv", module=conv_layer) if use_norm: norm_layer = nn.BatchNorm2d(num_features=out_channels, momentum=0.1) block.add_module(name="norm", module=norm_layer) if use_act: act_layer = nn.SiLU() block.add_module(name="act", module=act_layer) self.block = block def forward(self, x: Tensor) -> Tensor: return self.block(x) class InvertedResidual(nn.Module): """ This class implements the inverted residual block, as described in `MobileNetv2 `_ paper Args: in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})` out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out)` stride (int): Use convolutions with a stride. Default: 1 expand_ratio (Union[int, float]): Expand the input channels by this factor in depth-wise conv skip_connection (Optional[bool]): Use skip-connection. Default: True Shape: - Input: :math:`(N, C_{in}, H_{in}, W_{in})` - Output: :math:`(N, C_{out}, H_{out}, W_{out})` .. note:: If `in_channels =! out_channels` and `stride > 1`, we set `skip_connection=False` """ def __init__( self, in_channels: int, out_channels: int, stride: int, expand_ratio: Union[int, float], skip_connection: Optional[bool] = True, ) -> None: assert stride in [1, 2] hidden_dim = make_divisible(int(round(in_channels * expand_ratio)), 8) super().__init__() block = nn.Sequential() if expand_ratio != 1: block.add_module( name="exp_1x1", module=ConvLayer( in_channels=in_channels, out_channels=hidden_dim, kernel_size=1 ), ) block.add_module( name="conv_3x3", module=ConvLayer( in_channels=hidden_dim, out_channels=hidden_dim, stride=stride, kernel_size=3, groups=hidden_dim ), ) block.add_module( name="red_1x1", module=ConvLayer( in_channels=hidden_dim, out_channels=out_channels, kernel_size=1, use_act=False, use_norm=True, ), ) self.block = block self.in_channels = in_channels self.out_channels = out_channels self.exp = expand_ratio self.stride = stride self.use_res_connect = ( self.stride == 1 and in_channels == out_channels and skip_connection ) def forward(self, x: Tensor, *args, **kwargs) -> Tensor: if self.use_res_connect: return x + self.block(x) else: return self.block(x) class MobileViTBlock(nn.Module): """ This class defines the `MobileViT block `_ Args: opts: command line arguments in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H, W)` transformer_dim (int): Input dimension to the transformer unit ffn_dim (int): Dimension of the FFN block n_transformer_blocks (int): Number of transformer blocks. Default: 2 head_dim (int): Head dimension in the multi-head attention. Default: 32 attn_dropout (float): Dropout in multi-head attention. Default: 0.0 dropout (float): Dropout rate. Default: 0.0 ffn_dropout (float): Dropout between FFN layers in transformer. Default: 0.0 patch_h (int): Patch height for unfolding operation. Default: 8 patch_w (int): Patch width for unfolding operation. Default: 8 transformer_norm_layer (Optional[str]): Normalization layer in the transformer block. Default: layer_norm conv_ksize (int): Kernel size to learn local representations in MobileViT block. Default: 3 no_fusion (Optional[bool]): Do not combine the input and output feature maps. Default: False """ def __init__( self, in_channels: int, transformer_dim: int, ffn_dim: int, n_transformer_blocks: int = 2, head_dim: int = 32, attn_dropout: float = 0.0, dropout: float = 0.0, ffn_dropout: float = 0.0, patch_h: int = 8, patch_w: int = 8, conv_ksize: Optional[int] = 3, *args, **kwargs ) -> None: super().__init__() conv_3x3_in = ConvLayer( in_channels=in_channels, out_channels=in_channels, kernel_size=conv_ksize, stride=1 ) conv_1x1_in = ConvLayer( in_channels=in_channels, out_channels=transformer_dim, kernel_size=1, stride=1, use_norm=False, use_act=False ) conv_1x1_out = ConvLayer( in_channels=transformer_dim, out_channels=in_channels, kernel_size=1, stride=1 ) conv_3x3_out = ConvLayer( in_channels=2 * in_channels, out_channels=in_channels, kernel_size=conv_ksize, stride=1 ) self.local_rep = nn.Sequential() self.local_rep.add_module(name="conv_3x3", module=conv_3x3_in) self.local_rep.add_module(name="conv_1x1", module=conv_1x1_in) assert transformer_dim % head_dim == 0 num_heads = transformer_dim // head_dim global_rep = [ TransformerEncoder( embed_dim=transformer_dim, ffn_latent_dim=ffn_dim, num_heads=num_heads, attn_dropout=attn_dropout, dropout=dropout, ffn_dropout=ffn_dropout ) for _ in range(n_transformer_blocks) ] global_rep.append(nn.LayerNorm(transformer_dim)) self.global_rep = nn.Sequential(*global_rep) self.conv_proj = conv_1x1_out self.fusion = conv_3x3_out self.patch_h = patch_h self.patch_w = patch_w self.patch_area = self.patch_w * self.patch_h self.cnn_in_dim = in_channels self.cnn_out_dim = transformer_dim self.n_heads = num_heads self.ffn_dim = ffn_dim self.dropout = dropout self.attn_dropout = attn_dropout self.ffn_dropout = ffn_dropout self.n_blocks = n_transformer_blocks self.conv_ksize = conv_ksize def unfolding(self, x: Tensor) -> Tuple[Tensor, Dict]: patch_w, patch_h = self.patch_w, self.patch_h patch_area = patch_w * patch_h batch_size, in_channels, orig_h, orig_w = x.shape new_h = int(math.ceil(orig_h / self.patch_h) * self.patch_h) new_w = int(math.ceil(orig_w / self.patch_w) * self.patch_w) interpolate = False if new_w != orig_w or new_h != orig_h: # Note: Padding can be done, but then it needs to be handled in attention function. x = F.interpolate(x, size=(new_h, new_w), mode="bilinear", align_corners=False) interpolate = True # number of patches along width and height num_patch_w = new_w // patch_w # n_w num_patch_h = new_h // patch_h # n_h num_patches = num_patch_h * num_patch_w # N # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w] x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w) # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w] x = x.transpose(1, 2) # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w x = x.reshape(batch_size, in_channels, num_patches, patch_area) # [B, C, N, P] -> [B, P, N, C] x = x.transpose(1, 3) # [B, P, N, C] -> [BP, N, C] x = x.reshape(batch_size * patch_area, num_patches, -1) info_dict = { "orig_size": (orig_h, orig_w), "batch_size": batch_size, "interpolate": interpolate, "total_patches": num_patches, "num_patches_w": num_patch_w, "num_patches_h": num_patch_h, } return x, info_dict def folding(self, x: Tensor, info_dict: Dict) -> Tensor: n_dim = x.dim() assert n_dim == 3, "Tensor should be of shape BPxNxC. Got: {}".format( x.shape ) # [BP, N, C] --> [B, P, N, C] x = x.contiguous().view( info_dict["batch_size"], self.patch_area, info_dict["total_patches"], -1 ) batch_size, pixels, num_patches, channels = x.size() num_patch_h = info_dict["num_patches_h"] num_patch_w = info_dict["num_patches_w"] # [B, P, N, C] -> [B, C, N, P] x = x.transpose(1, 3) # [B, C, N, P] -> [B*C*n_h, n_w, p_h, p_w] x = x.reshape(batch_size * channels * num_patch_h, num_patch_w, self.patch_h, self.patch_w) # [B*C*n_h, n_w, p_h, p_w] -> [B*C*n_h, p_h, n_w, p_w] x = x.transpose(1, 2) # [B*C*n_h, p_h, n_w, p_w] -> [B, C, H, W] x = x.reshape(batch_size, channels, num_patch_h * self.patch_h, num_patch_w * self.patch_w) if info_dict["interpolate"]: x = F.interpolate( x, size=info_dict["orig_size"], mode="bilinear", align_corners=False, ) return x def forward(self, x: Tensor) -> Tensor: res = x fm = self.local_rep(x) # convert feature map to patches patches, info_dict = self.unfolding(fm) # learn global representations for transformer_layer in self.global_rep: patches = transformer_layer(patches) # [B x Patch x Patches x C] -> [B x C x Patches x Patch] fm = self.folding(x=patches, info_dict=info_dict) fm = self.conv_proj(fm) fm = self.fusion(torch.cat((res, fm), dim=1)) return fm class MobileViT(nn.Module): """ This class implements the `MobileViT architecture `_ """ def __init__(self, model_cfg: Dict, num_classes: int = 1000): super().__init__() image_channels = 3 out_channels = 16 self.conv_1 = ConvLayer( in_channels=image_channels, out_channels=out_channels, kernel_size=3, stride=2 ) self.layer_1, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer1"]) self.layer_2, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer2"]) self.layer_3, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer3"]) self.layer_4, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer4"]) self.layer_5, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer5"]) exp_channels = min(model_cfg["last_layer_exp_factor"] * out_channels, 960) self.conv_1x1_exp = ConvLayer( in_channels=out_channels, out_channels=exp_channels, kernel_size=1 ) self.classifier = nn.Sequential() self.classifier.add_module(name="global_pool", module=nn.AdaptiveAvgPool2d(1)) self.classifier.add_module(name="flatten", module=nn.Flatten()) if 0.0 < model_cfg["cls_dropout"] < 1.0: self.classifier.add_module(name="dropout", module=nn.Dropout(p=model_cfg["cls_dropout"])) self.classifier.add_module(name="fc", module=nn.Linear(in_features=exp_channels, out_features=num_classes)) # weight init self.apply(self.init_parameters) def _make_layer(self, input_channel, cfg: Dict) -> Tuple[nn.Sequential, int]: block_type = cfg.get("block_type", "mobilevit") if block_type.lower() == "mobilevit": return self._make_mit_layer(input_channel=input_channel, cfg=cfg) else: return self._make_mobilenet_layer(input_channel=input_channel, cfg=cfg) @staticmethod def _make_mobilenet_layer(input_channel: int, cfg: Dict) -> Tuple[nn.Sequential, int]: output_channels = cfg.get("out_channels") num_blocks = cfg.get("num_blocks", 2) expand_ratio = cfg.get("expand_ratio", 4) block = [] for i in range(num_blocks): stride = cfg.get("stride", 1) if i == 0 else 1 layer = InvertedResidual( in_channels=input_channel, out_channels=output_channels, stride=stride, expand_ratio=expand_ratio ) block.append(layer) input_channel = output_channels return nn.Sequential(*block), input_channel @staticmethod def _make_mit_layer(input_channel: int, cfg: Dict) -> [nn.Sequential, int]: stride = cfg.get("stride", 1) block = [] if stride == 2: layer = InvertedResidual( in_channels=input_channel, out_channels=cfg.get("out_channels"), stride=stride, expand_ratio=cfg.get("mv_expand_ratio", 4) ) block.append(layer) input_channel = cfg.get("out_channels") transformer_dim = cfg["transformer_channels"] ffn_dim = cfg.get("ffn_dim") num_heads = cfg.get("num_heads", 4) head_dim = transformer_dim // num_heads if transformer_dim % head_dim != 0: raise ValueError("Transformer input dimension should be divisible by head dimension. " "Got {} and {}.".format(transformer_dim, head_dim)) block.append(MobileViTBlock( in_channels=input_channel, transformer_dim=transformer_dim, ffn_dim=ffn_dim, n_transformer_blocks=cfg.get("transformer_blocks", 1), patch_h=cfg.get("patch_h", 2), patch_w=cfg.get("patch_w", 2), dropout=cfg.get("dropout", 0.1), ffn_dropout=cfg.get("ffn_dropout", 0.0), attn_dropout=cfg.get("attn_dropout", 0.1), head_dim=head_dim, conv_ksize=3 )) return nn.Sequential(*block), input_channel @staticmethod def init_parameters(m): if isinstance(m, nn.Conv2d): if m.weight is not None: nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)): if m.weight is not None: nn.init.ones_(m.weight) if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.Linear,)): if m.weight is not None: nn.init.trunc_normal_(m.weight, mean=0.0, std=0.02) if m.bias is not None: nn.init.zeros_(m.bias) else: pass def forward(self, x: Tensor) -> Tensor: x = self.conv_1(x) x = self.layer_1(x) x = self.layer_2(x) x = self.layer_3(x) x = self.layer_4(x) x = self.layer_5(x) x = self.conv_1x1_exp(x) x = self.classifier(x) return x def mobile_vit_xx_small(num_classes: int = 1000): # pretrain weight link # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xxs.pt config = get_config("xx_small") m = MobileViT(config, num_classes=num_classes) return m def mobile_vit_x_small(num_classes: int = 1000): # pretrain weight link # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xs.pt config = get_config("x_small") m = MobileViT(config, num_classes=num_classes) return m def mobile_vit_small(num_classes: int = 1000): # pretrain weight link # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_s.pt config = get_config("small") m = MobileViT(config, num_classes=num_classes) return m ================================================ FILE: pytorch_classification/MobileViT/model_config.py ================================================ def get_config(mode: str = "xxs") -> dict: if mode == "xx_small": mv2_exp_mult = 2 config = { "layer1": { "out_channels": 16, "expand_ratio": mv2_exp_mult, "num_blocks": 1, "stride": 1, "block_type": "mv2", }, "layer2": { "out_channels": 24, "expand_ratio": mv2_exp_mult, "num_blocks": 3, "stride": 2, "block_type": "mv2", }, "layer3": { # 28x28 "out_channels": 48, "transformer_channels": 64, "ffn_dim": 128, "transformer_blocks": 2, "patch_h": 2, # 8, "patch_w": 2, # 8, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "layer4": { # 14x14 "out_channels": 64, "transformer_channels": 80, "ffn_dim": 160, "transformer_blocks": 4, "patch_h": 2, # 4, "patch_w": 2, # 4, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "layer5": { # 7x7 "out_channels": 80, "transformer_channels": 96, "ffn_dim": 192, "transformer_blocks": 3, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "last_layer_exp_factor": 4, "cls_dropout": 0.1 } elif mode == "x_small": mv2_exp_mult = 4 config = { "layer1": { "out_channels": 32, "expand_ratio": mv2_exp_mult, "num_blocks": 1, "stride": 1, "block_type": "mv2", }, "layer2": { "out_channels": 48, "expand_ratio": mv2_exp_mult, "num_blocks": 3, "stride": 2, "block_type": "mv2", }, "layer3": { # 28x28 "out_channels": 64, "transformer_channels": 96, "ffn_dim": 192, "transformer_blocks": 2, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "layer4": { # 14x14 "out_channels": 80, "transformer_channels": 120, "ffn_dim": 240, "transformer_blocks": 4, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "layer5": { # 7x7 "out_channels": 96, "transformer_channels": 144, "ffn_dim": 288, "transformer_blocks": 3, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "last_layer_exp_factor": 4, "cls_dropout": 0.1 } elif mode == "small": mv2_exp_mult = 4 config = { "layer1": { "out_channels": 32, "expand_ratio": mv2_exp_mult, "num_blocks": 1, "stride": 1, "block_type": "mv2", }, "layer2": { "out_channels": 64, "expand_ratio": mv2_exp_mult, "num_blocks": 3, "stride": 2, "block_type": "mv2", }, "layer3": { # 28x28 "out_channels": 96, "transformer_channels": 144, "ffn_dim": 288, "transformer_blocks": 2, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "layer4": { # 14x14 "out_channels": 128, "transformer_channels": 192, "ffn_dim": 384, "transformer_blocks": 4, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "layer5": { # 7x7 "out_channels": 160, "transformer_channels": 240, "ffn_dim": 480, "transformer_blocks": 3, "patch_h": 2, "patch_w": 2, "stride": 2, "mv_expand_ratio": mv2_exp_mult, "num_heads": 4, "block_type": "mobilevit", }, "last_layer_exp_factor": 4, "cls_dropout": 0.1 } else: raise NotImplementedError for k in ["layer1", "layer2", "layer3", "layer4", "layer5"]: config[k].update({"dropout": 0.1, "ffn_dropout": 0.0, "attn_dropout": 0.0}) return config ================================================ FILE: pytorch_classification/MobileViT/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/MobileViT/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import mobile_vit_xx_small as create_model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") img_size = 224 data_transform = transforms.Compose( [transforms.Resize(int(img_size * 1.14)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=5).to(device) # load model weights model_weight_path = "./weights/best_model.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/MobileViT/train.py ================================================ import os import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from my_dataset import MyDataSet from model import mobile_vit_xx_small as create_model from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") if os.path.exists("./weights") is False: os.makedirs("./weights") tb_writer = SummaryWriter() train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) img_size = 224 data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(img_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=args.num_classes).to(device) if args.weights != "": assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights) weights_dict = torch.load(args.weights, map_location=device) weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict # 删除有关分类类别的权重 for k in list(weights_dict.keys()): if "classifier" in k: del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) if args.freeze_layers: for name, para in model.named_parameters(): # 除head外,其他权重全部冻结 if "classifier" not in name: para.requires_grad_(False) else: print("training {}".format(name)) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=1E-2) best_acc = 0. for epoch in range(args.epochs): # train train_loss, train_acc = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) # validate val_loss, val_acc = evaluate(model=model, data_loader=val_loader, device=device, epoch=epoch) tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"] tb_writer.add_scalar(tags[0], train_loss, epoch) tb_writer.add_scalar(tags[1], train_acc, epoch) tb_writer.add_scalar(tags[2], val_loss, epoch) tb_writer.add_scalar(tags[3], val_acc, epoch) tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch) if val_acc > best_acc: best_acc = val_acc torch.save(model.state_dict(), "./weights/best_model.pth") torch.save(model.state_dict(), "./weights/latest_model.pth") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--lr', type=float, default=0.0002) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # 预训练权重路径,如果不想载入就设置为空字符 parser.add_argument('--weights', type=str, default='./mobilevit_xxs.pt', help='initial weights path') # 是否冻结权重 parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/MobileViT/transformer.py ================================================ from typing import Optional import torch import torch.nn as nn from torch import Tensor class MultiHeadAttention(nn.Module): """ This layer applies a multi-head self- or cross-attention as described in `Attention is all you need `_ paper Args: embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})` num_heads (int): Number of heads in multi-head attention attn_dropout (float): Attention dropout. Default: 0.0 bias (bool): Use bias or not. Default: ``True`` Shape: - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches, and :math:`C_{in}` is input embedding dim - Output: same shape as the input """ def __init__( self, embed_dim: int, num_heads: int, attn_dropout: float = 0.0, bias: bool = True, *args, **kwargs ) -> None: super().__init__() if embed_dim % num_heads != 0: raise ValueError( "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format( self.__class__.__name__, embed_dim, num_heads ) ) self.qkv_proj = nn.Linear(in_features=embed_dim, out_features=3 * embed_dim, bias=bias) self.attn_dropout = nn.Dropout(p=attn_dropout) self.out_proj = nn.Linear(in_features=embed_dim, out_features=embed_dim, bias=bias) self.head_dim = embed_dim // num_heads self.scaling = self.head_dim ** -0.5 self.softmax = nn.Softmax(dim=-1) self.num_heads = num_heads self.embed_dim = embed_dim def forward(self, x_q: Tensor) -> Tensor: # [N, P, C] b_sz, n_patches, in_channels = x_q.shape # self-attention # [N, P, C] -> [N, P, 3C] -> [N, P, 3, h, c] where C = hc qkv = self.qkv_proj(x_q).reshape(b_sz, n_patches, 3, self.num_heads, -1) # [N, P, 3, h, c] -> [N, h, 3, P, C] qkv = qkv.transpose(1, 3).contiguous() # [N, h, 3, P, C] -> [N, h, P, C] x 3 query, key, value = qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2] query = query * self.scaling # [N h, P, c] -> [N, h, c, P] key = key.transpose(-1, -2) # QK^T # [N, h, P, c] x [N, h, c, P] -> [N, h, P, P] attn = torch.matmul(query, key) attn = self.softmax(attn) attn = self.attn_dropout(attn) # weighted sum # [N, h, P, P] x [N, h, P, c] -> [N, h, P, c] out = torch.matmul(attn, value) # [N, h, P, c] -> [N, P, h, c] -> [N, P, C] out = out.transpose(1, 2).reshape(b_sz, n_patches, -1) out = self.out_proj(out) return out class TransformerEncoder(nn.Module): """ This class defines the pre-norm `Transformer encoder `_ Args: embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})` ffn_latent_dim (int): Inner dimension of the FFN num_heads (int) : Number of heads in multi-head attention. Default: 8 attn_dropout (float): Dropout rate for attention in multi-head attention. Default: 0.0 dropout (float): Dropout rate. Default: 0.0 ffn_dropout (float): Dropout between FFN layers. Default: 0.0 Shape: - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches, and :math:`C_{in}` is input embedding dim - Output: same shape as the input """ def __init__( self, embed_dim: int, ffn_latent_dim: int, num_heads: Optional[int] = 8, attn_dropout: Optional[float] = 0.0, dropout: Optional[float] = 0.0, ffn_dropout: Optional[float] = 0.0, *args, **kwargs ) -> None: super().__init__() attn_unit = MultiHeadAttention( embed_dim, num_heads, attn_dropout=attn_dropout, bias=True ) self.pre_norm_mha = nn.Sequential( nn.LayerNorm(embed_dim), attn_unit, nn.Dropout(p=dropout) ) self.pre_norm_ffn = nn.Sequential( nn.LayerNorm(embed_dim), nn.Linear(in_features=embed_dim, out_features=ffn_latent_dim, bias=True), nn.SiLU(), nn.Dropout(p=ffn_dropout), nn.Linear(in_features=ffn_latent_dim, out_features=embed_dim, bias=True), nn.Dropout(p=dropout) ) self.embed_dim = embed_dim self.ffn_dim = ffn_latent_dim self.ffn_dropout = ffn_dropout self.std_dropout = dropout def forward(self, x: Tensor) -> Tensor: # multi-head attention res = x x = self.pre_norm_mha(x) x = x + res # feed forward network x = x + self.pre_norm_ffn(x) return x ================================================ FILE: pytorch_classification/MobileViT/unfold_test.py ================================================ import time import torch batch_size = 8 in_channels = 32 patch_h = 2 patch_w = 2 num_patch_h = 16 num_patch_w = 16 num_patches = num_patch_h * num_patch_w patch_area = patch_h * patch_w def official(x: torch.Tensor): # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w] x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w) # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w] x = x.transpose(1, 2) # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w x = x.reshape(batch_size, in_channels, num_patches, patch_area) # [B, C, N, P] -> [B, P, N, C] x = x.transpose(1, 3) # [B, P, N, C] -> [BP, N, C] x = x.reshape(batch_size * patch_area, num_patches, -1) return x def my_self(x: torch.Tensor): # [B, C, H, W] -> [B, C, n_h, p_h, n_w, p_w] x = x.reshape(batch_size, in_channels, num_patch_h, patch_h, num_patch_w, patch_w) # [B, C, n_h, p_h, n_w, p_w] -> [B, C, n_h, n_w, p_h, p_w] x = x.transpose(3, 4) # [B, C, n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w x = x.reshape(batch_size, in_channels, num_patches, patch_area) # [B, C, N, P] -> [B, P, N, C] x = x.transpose(1, 3) # [B, P, N, C] -> [BP, N, C] x = x.reshape(batch_size * patch_area, num_patches, -1) return x if __name__ == '__main__': t = torch.randn(batch_size, in_channels, num_patch_h * patch_h, num_patch_w * patch_w) print(torch.equal(official(t), my_self(t))) t1 = time.time() for _ in range(1000): official(t) print(f"official time: {time.time() - t1}") t1 = time.time() for _ in range(1000): my_self(t) print(f"self time: {time.time() - t1}") ================================================ FILE: pytorch_classification/MobileViT/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss(label_smoothing=0.1) accu_loss = torch.zeros(1).to(device) # 累计损失 accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 optimizer.zero_grad() sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) loss.backward() accu_loss += loss.detach() data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return accu_loss.item() / (step + 1), accu_num.item() / sample_num @torch.no_grad() def evaluate(model, data_loader, device, epoch): loss_function = torch.nn.CrossEntropyLoss() model.eval() accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 accu_loss = torch.zeros(1).to(device) # 累计损失 sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) accu_loss += loss data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) return accu_loss.item() / (step + 1), accu_num.item() / sample_num ================================================ FILE: pytorch_classification/README.md ================================================ ## 该文件夹存放使用pytorch实现的代码版本 **model.py**: 是模型文件 **train.py**: 是调用模型训练的文件 **predict.py**: 是调用模型进行预测的文件 **class_indices.json**: 是训练数据集对应的标签文件 ------ 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。 [点击这里](../data_set/README.md)会告诉你如何去下载数据集,以及提供了现成的划分数据集脚本 ================================================ FILE: pytorch_classification/Test10_regnet/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA 密码: 8lmu 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/Test10_regnet/model.py ================================================ from typing import Optional import numpy as np import torch import torch.nn as nn from torch import Tensor def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch def _mcfg(**kwargs): cfg = dict(se_ratio=0., bottle_ratio=1., stem_width=32) cfg.update(**kwargs) return cfg model_cfgs = { "regnetx_200mf": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13), "regnetx_400mf": _mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22), "regnetx_600mf": _mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16), "regnetx_800mf": _mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16), "regnetx_1.6gf": _mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18), "regnetx_3.2gf": _mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25), "regnetx_4.0gf": _mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23), "regnetx_6.4gf": _mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17), "regnetx_8.0gf": _mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23), "regnetx_12gf": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19), "regnetx_16gf": _mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22), "regnetx_32gf": _mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23), "regnety_200mf": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25), "regnety_400mf": _mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25), "regnety_600mf": _mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25), "regnety_800mf": _mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25), "regnety_1.6gf": _mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25), "regnety_3.2gf": _mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25), "regnety_4.0gf": _mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25), "regnety_6.4gf": _mcfg(w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25), "regnety_8.0gf": _mcfg(w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25), "regnety_12gf": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25), "regnety_16gf": _mcfg(w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25), "regnety_32gf": _mcfg(w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25) } def generate_width_depth(wa, w0, wm, depth, q=8): """Generates per block widths from RegNet parameters.""" assert wa > 0 and w0 > 0 and wm > 1 and w0 % q == 0 widths_cont = np.arange(depth) * wa + w0 width_exps = np.round(np.log(widths_cont / w0) / np.log(wm)) widths_j = w0 * np.power(wm, width_exps) widths_j = np.round(np.divide(widths_j, q)) * q num_stages, max_stage = len(np.unique(widths_j)), width_exps.max() + 1 assert num_stages == int(max_stage) assert num_stages == 4 widths = widths_j.astype(int).tolist() return widths, num_stages def adjust_width_groups_comp(widths: list, groups: list): """Adjusts the compatibility of widths and groups.""" groups = [min(g, w_bot) for g, w_bot in zip(groups, widths)] # Adjust w to an integral multiple of g widths = [int(round(w / g) * g) for w, g in zip(widths, groups)] return widths, groups class ConvBNAct(nn.Module): def __init__(self, in_c: int, out_c: int, kernel_s: int = 1, stride: int = 1, padding: int = 0, groups: int = 1, act: Optional[nn.Module] = nn.ReLU(inplace=True)): super(ConvBNAct, self).__init__() self.conv = nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=kernel_s, stride=stride, padding=padding, groups=groups, bias=False) self.bn = nn.BatchNorm2d(out_c) self.act = act if act is not None else nn.Identity() def forward(self, x: Tensor) -> Tensor: x = self.conv(x) x = self.bn(x) x = self.act(x) return x class RegHead(nn.Module): def __init__(self, in_unit: int = 368, out_unit: int = 1000, output_size: tuple = (1, 1), drop_ratio: float = 0.25): super(RegHead, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size) if drop_ratio > 0: self.dropout = nn.Dropout(p=drop_ratio) else: self.dropout = nn.Identity() self.fc = nn.Linear(in_features=in_unit, out_features=out_unit) def forward(self, x: Tensor) -> Tensor: x = self.pool(x) x = torch.flatten(x, start_dim=1) x = self.dropout(x) x = self.fc(x) return x class SqueezeExcitation(nn.Module): def __init__(self, input_c: int, expand_c: int, se_ratio: float = 0.25): super(SqueezeExcitation, self).__init__() squeeze_c = int(input_c * se_ratio) self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1) self.ac1 = nn.ReLU(inplace=True) self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1) self.ac2 = nn.Sigmoid() def forward(self, x: Tensor) -> Tensor: scale = x.mean((2, 3), keepdim=True) scale = self.fc1(scale) scale = self.ac1(scale) scale = self.fc2(scale) scale = self.ac2(scale) return scale * x class Bottleneck(nn.Module): def __init__(self, in_c: int, out_c: int, stride: int = 1, group_width: int = 1, se_ratio: float = 0., drop_ratio: float = 0.): super(Bottleneck, self).__init__() self.conv1 = ConvBNAct(in_c=in_c, out_c=out_c, kernel_s=1) self.conv2 = ConvBNAct(in_c=out_c, out_c=out_c, kernel_s=3, stride=stride, padding=1, groups=out_c // group_width) if se_ratio > 0: self.se = SqueezeExcitation(in_c, out_c, se_ratio) else: self.se = nn.Identity() self.conv3 = ConvBNAct(in_c=out_c, out_c=out_c, kernel_s=1, act=None) self.ac3 = nn.ReLU(inplace=True) if drop_ratio > 0: self.dropout = nn.Dropout(p=drop_ratio) else: self.dropout = nn.Identity() if (in_c != out_c) or (stride != 1): self.downsample = ConvBNAct(in_c=in_c, out_c=out_c, kernel_s=1, stride=stride, act=None) else: self.downsample = nn.Identity() def zero_init_last_bn(self): nn.init.zeros_(self.conv3.bn.weight) def forward(self, x: Tensor) -> Tensor: shortcut = x x = self.conv1(x) x = self.conv2(x) x = self.se(x) x = self.conv3(x) x = self.dropout(x) shortcut = self.downsample(shortcut) x += shortcut x = self.ac3(x) return x class RegStage(nn.Module): def __init__(self, in_c: int, out_c: int, depth: int, group_width: int, se_ratio: float): super(RegStage, self).__init__() for i in range(depth): block_stride = 2 if i == 0 else 1 block_in_c = in_c if i == 0 else out_c name = "b{}".format(i + 1) self.add_module(name, Bottleneck(in_c=block_in_c, out_c=out_c, stride=block_stride, group_width=group_width, se_ratio=se_ratio)) def forward(self, x: Tensor) -> Tensor: for block in self.children(): x = block(x) return x class RegNet(nn.Module): """RegNet model. Paper: https://arxiv.org/abs/2003.13678 Original Impl: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py and refer to: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/regnet.py """ def __init__(self, cfg: dict, in_c: int = 3, num_classes: int = 1000, zero_init_last_bn: bool = True): super(RegNet, self).__init__() # RegStem stem_c = cfg["stem_width"] self.stem = ConvBNAct(in_c, out_c=stem_c, kernel_s=3, stride=2, padding=1) # build stages input_channels = stem_c stage_info = self._build_stage_info(cfg) for i, stage_args in enumerate(stage_info): stage_name = "s{}".format(i + 1) self.add_module(stage_name, RegStage(in_c=input_channels, **stage_args)) input_channels = stage_args["out_c"] # RegHead self.head = RegHead(in_unit=input_channels, out_unit=num_classes) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_uniform_(m.weight, mode="fan_out", nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, mean=0.0, std=0.01) nn.init.zeros_(m.bias) if zero_init_last_bn: for m in self.modules(): if hasattr(m, "zero_init_last_bn"): m.zero_init_last_bn() def forward(self, x: Tensor) -> Tensor: for layer in self.children(): x = layer(x) return x @staticmethod def _build_stage_info(cfg: dict): wa, w0, wm, d = cfg["wa"], cfg["w0"], cfg["wm"], cfg["depth"] widths, num_stages = generate_width_depth(wa, w0, wm, d) stage_widths, stage_depths = np.unique(widths, return_counts=True) stage_groups = [cfg['group_w'] for _ in range(num_stages)] stage_widths, stage_groups = adjust_width_groups_comp(stage_widths, stage_groups) info = [] for i in range(num_stages): info.append(dict(out_c=stage_widths[i], depth=stage_depths[i], group_width=stage_groups[i], se_ratio=cfg["se_ratio"])) return info def create_regnet(model_name="RegNetX_200MF", num_classes=1000): model_name = model_name.lower().replace("-", "_") if model_name not in model_cfgs.keys(): print("support model name: \n{}".format("\n".join(model_cfgs.keys()))) raise KeyError("not support model name: {}".format(model_name)) model = RegNet(cfg=model_cfgs[model_name], num_classes=num_classes) return model ================================================ FILE: pytorch_classification/Test10_regnet/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/Test10_regnet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import create_regnet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_regnet(model_name="RegNetY_400MF", num_classes=5).to(device) # load model weights model_weight_path = "./weights/model-29.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test10_regnet/pretrain_weights.py ================================================ import requests download_links = { "regnetx_200mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth', "regnetx_400mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth', "regnetx_600mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth', "regnetx_800mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth', "regnetx_1.6gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth', "regnetx_3.2gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth', "regnetx_4.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth', "regnetx_6.4gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth', "regnetx_8.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth', "regnetx_12gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth', "regnetx_16gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth', "regnetx_32gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth', "regnety_200mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth', "regnety_400mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth', "regnety_600mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth', "regnety_800mf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth', "regnety_1.6gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth', "regnety_3.2gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth', "regnety_4.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth', "regnety_6.4gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth', "regnety_8.0gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth', "regnety_12gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth', "regnety_16gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth', "regnety_32gf": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth', } def main(): model_name = "regnetx_400mf" print("download weights name: " + model_name) if model_name not in download_links.keys(): raise KeyError("{} not in download_links".format(model_name)) headers = {"Content-Type": "application/json", "Connection": "close", "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"} save_weights = "./" + model_name + ".pth" req = requests.get(url=download_links[model_name], stream=True, headers=headers, timeout=10) req.raise_for_status() info = int(req.headers["Content-Length"]) accumulate_data = 0 with open(save_weights, "wb") as f: for data in req.iter_content(2048): f.write(data) accumulate_data += 2048 print("\rdownload: [{}Mb/{}Mb] {}%".format(int(accumulate_data / 1024 / 1024), int(info / 1024 / 1024), int(accumulate_data / info * 100)), end="") req.close() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test10_regnet/train.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import create_regnet from my_dataset import MyDataSet from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # 如果存在预训练权重则载入 model = create_regnet(model_name=args.model_name, num_classes=args.num_classes).to(device) # print(model) if args.weights != "": if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} print(model.load_state_dict(load_weights_dict, strict=False)) else: raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "head" not in name: para.requires_grad_(False) else: print("train {}".format(name)) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.01) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") parser.add_argument('--model-name', default='RegNetY_400MF', help='create model name') # 预训练权重下载地址 # 链接: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA 密码: 8lmu parser.add_argument('--weights', type=str, default='regnety_400mf.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/Test10_regnet/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() mean_loss = torch.zeros(1).to(device) optimizer.zero_grad() data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) loss.backward() mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return mean_loss.item() @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 验证样本总个数 total_num = len(data_loader.dataset) # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() return sum_num.item() / total_num ================================================ FILE: pytorch_classification/Test11_efficientnetV2/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ 密码: 5gu1 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/Test11_efficientnetV2/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test11_efficientnetV2/model.py ================================================ from collections import OrderedDict from functools import partial from typing import Callable, Optional import torch.nn as nn import torch from torch import Tensor def drop_path(x, drop_prob: float = 0., training: bool = False): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf This function is taken from the rwightman. It can be seen here: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140 """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class ConvBNAct(nn.Module): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None): super(ConvBNAct, self).__init__() padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.SiLU # alias Swish (torch>=1.7) self.conv = nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False) self.bn = norm_layer(out_planes) self.act = activation_layer() def forward(self, x): result = self.conv(x) result = self.bn(result) result = self.act(result) return result class SqueezeExcite(nn.Module): def __init__(self, input_c: int, # block input channel expand_c: int, # block expand channel se_ratio: float = 0.25): super(SqueezeExcite, self).__init__() squeeze_c = int(input_c * se_ratio) self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1) self.act1 = nn.SiLU() # alias Swish self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1) self.act2 = nn.Sigmoid() def forward(self, x: Tensor) -> Tensor: scale = x.mean((2, 3), keepdim=True) scale = self.conv_reduce(scale) scale = self.act1(scale) scale = self.conv_expand(scale) scale = self.act2(scale) return scale * x class MBConv(nn.Module): def __init__(self, kernel_size: int, input_c: int, out_c: int, expand_ratio: int, stride: int, se_ratio: float, drop_rate: float, norm_layer: Callable[..., nn.Module]): super(MBConv, self).__init__() if stride not in [1, 2]: raise ValueError("illegal stride value.") self.has_shortcut = (stride == 1 and input_c == out_c) activation_layer = nn.SiLU # alias Swish expanded_c = input_c * expand_ratio # 在EfficientNetV2中,MBConv中不存在expansion=1的情况所以conv_pw肯定存在 assert expand_ratio != 1 # Point-wise expansion self.expand_conv = ConvBNAct(input_c, expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer) # Depth-wise convolution self.dwconv = ConvBNAct(expanded_c, expanded_c, kernel_size=kernel_size, stride=stride, groups=expanded_c, norm_layer=norm_layer, activation_layer=activation_layer) self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity() # Point-wise linear projection self.project_conv = ConvBNAct(expanded_c, out_planes=out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) # 注意这里没有激活函数,所有传入Identity self.out_channels = out_c # 只有在使用shortcut连接时才使用dropout层 self.drop_rate = drop_rate if self.has_shortcut and drop_rate > 0: self.dropout = DropPath(drop_rate) def forward(self, x: Tensor) -> Tensor: result = self.expand_conv(x) result = self.dwconv(result) result = self.se(result) result = self.project_conv(result) if self.has_shortcut: if self.drop_rate > 0: result = self.dropout(result) result += x return result class FusedMBConv(nn.Module): def __init__(self, kernel_size: int, input_c: int, out_c: int, expand_ratio: int, stride: int, se_ratio: float, drop_rate: float, norm_layer: Callable[..., nn.Module]): super(FusedMBConv, self).__init__() assert stride in [1, 2] assert se_ratio == 0 self.has_shortcut = stride == 1 and input_c == out_c self.drop_rate = drop_rate self.has_expansion = expand_ratio != 1 activation_layer = nn.SiLU # alias Swish expanded_c = input_c * expand_ratio # 只有当expand ratio不等于1时才有expand conv if self.has_expansion: # Expansion convolution self.expand_conv = ConvBNAct(input_c, expanded_c, kernel_size=kernel_size, stride=stride, norm_layer=norm_layer, activation_layer=activation_layer) self.project_conv = ConvBNAct(expanded_c, out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) # 注意没有激活函数 else: # 当只有project_conv时的情况 self.project_conv = ConvBNAct(input_c, out_c, kernel_size=kernel_size, stride=stride, norm_layer=norm_layer, activation_layer=activation_layer) # 注意有激活函数 self.out_channels = out_c # 只有在使用shortcut连接时才使用dropout层 self.drop_rate = drop_rate if self.has_shortcut and drop_rate > 0: self.dropout = DropPath(drop_rate) def forward(self, x: Tensor) -> Tensor: if self.has_expansion: result = self.expand_conv(x) result = self.project_conv(result) else: result = self.project_conv(x) if self.has_shortcut: if self.drop_rate > 0: result = self.dropout(result) result += x return result class EfficientNetV2(nn.Module): def __init__(self, model_cnf: list, num_classes: int = 1000, num_features: int = 1280, dropout_rate: float = 0.2, drop_connect_rate: float = 0.2): super(EfficientNetV2, self).__init__() for cnf in model_cnf: assert len(cnf) == 8 norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1) stem_filter_num = model_cnf[0][4] self.stem = ConvBNAct(3, stem_filter_num, kernel_size=3, stride=2, norm_layer=norm_layer) # 激活函数默认是SiLU total_blocks = sum([i[0] for i in model_cnf]) block_id = 0 blocks = [] for cnf in model_cnf: repeats = cnf[0] op = FusedMBConv if cnf[-2] == 0 else MBConv for i in range(repeats): blocks.append(op(kernel_size=cnf[1], input_c=cnf[4] if i == 0 else cnf[5], out_c=cnf[5], expand_ratio=cnf[3], stride=cnf[2] if i == 0 else 1, se_ratio=cnf[-1], drop_rate=drop_connect_rate * block_id / total_blocks, norm_layer=norm_layer)) block_id += 1 self.blocks = nn.Sequential(*blocks) head_input_c = model_cnf[-1][-3] head = OrderedDict() head.update({"project_conv": ConvBNAct(head_input_c, num_features, kernel_size=1, norm_layer=norm_layer)}) # 激活函数默认是SiLU head.update({"avgpool": nn.AdaptiveAvgPool2d(1)}) head.update({"flatten": nn.Flatten()}) if dropout_rate > 0: head.update({"dropout": nn.Dropout(p=dropout_rate, inplace=True)}) head.update({"classifier": nn.Linear(num_features, num_classes)}) self.head = nn.Sequential(head) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def forward(self, x: Tensor) -> Tensor: x = self.stem(x) x = self.blocks(x) x = self.head(x) return x def efficientnetv2_s(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 300, eval_size: 384 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[2, 3, 1, 1, 24, 24, 0, 0], [4, 3, 2, 4, 24, 48, 0, 0], [4, 3, 2, 4, 48, 64, 0, 0], [6, 3, 2, 4, 64, 128, 1, 0.25], [9, 3, 1, 6, 128, 160, 1, 0.25], [15, 3, 2, 6, 160, 256, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.2) return model def efficientnetv2_m(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 384, eval_size: 480 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[3, 3, 1, 1, 24, 24, 0, 0], [5, 3, 2, 4, 24, 48, 0, 0], [5, 3, 2, 4, 48, 80, 0, 0], [7, 3, 2, 4, 80, 160, 1, 0.25], [14, 3, 1, 6, 160, 176, 1, 0.25], [18, 3, 2, 6, 176, 304, 1, 0.25], [5, 3, 1, 6, 304, 512, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.3) return model def efficientnetv2_l(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 384, eval_size: 480 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[4, 3, 1, 1, 32, 32, 0, 0], [7, 3, 2, 4, 32, 64, 0, 0], [7, 3, 2, 4, 64, 96, 0, 0], [10, 3, 2, 4, 96, 192, 1, 0.25], [19, 3, 1, 6, 192, 224, 1, 0.25], [25, 3, 2, 6, 224, 384, 1, 0.25], [7, 3, 1, 6, 384, 640, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.4) return model ================================================ FILE: pytorch_classification/Test11_efficientnetV2/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/Test11_efficientnetV2/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import efficientnetv2_s as create_model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") img_size = {"s": [300, 384], # train_size, val_size "m": [384, 480], "l": [384, 480]} num_model = "s" data_transform = transforms.Compose( [transforms.Resize(img_size[num_model][1]), transforms.CenterCrop(img_size[num_model][1]), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=5).to(device) # load model weights model_weight_path = "./weights/model-29.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test11_efficientnetV2/train.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import efficientnetv2_s as create_model from my_dataset import MyDataSet from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) img_size = {"s": [300, 384], # train_size, val_size "m": [384, 480], "l": [384, 480]} num_model = "s" data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model][0]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]), "val": transforms.Compose([transforms.Resize(img_size[num_model][1]), transforms.CenterCrop(img_size[num_model][1]), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # 如果存在预训练权重则载入 model = create_model(num_classes=args.num_classes).to(device) if args.weights != "": if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} print(model.load_state_dict(load_weights_dict, strict=False)) else: raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除head外,其他权重全部冻结 if "head" not in name: para.requires_grad_(False) else: print("training {}".format(name)) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train train_loss, train_acc = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate val_loss, val_acc = evaluate(model=model, data_loader=val_loader, device=device, epoch=epoch) tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"] tb_writer.add_scalar(tags[0], train_loss, epoch) tb_writer.add_scalar(tags[1], train_acc, epoch) tb_writer.add_scalar(tags[2], val_loss, epoch) tb_writer.add_scalar(tags[3], val_acc, epoch) tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--lrf', type=float, default=0.01) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # download model weights # 链接: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ 密码: 5gu1 parser.add_argument('--weights', type=str, default='./pre_efficientnetv2-s.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=True) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/Test11_efficientnetV2/trans_effv2_weights.py ================================================ import tensorflow as tf import torch import numpy as np def main(model_name: str = "efficientnetv2-s", tf_weights_path: str = "./efficientnetv2-s/model", stage0_num: int = 2, fused_conv_num: int = 10): except_var = ["global_step"] new_weights = {} var_list = [i for i in tf.train.list_variables(tf_weights_path) if "Exponential" not in i[0]] reader = tf.train.load_checkpoint(tf_weights_path) for v in var_list: if v[0] in except_var: continue new_name = v[0].replace(model_name + "/", "").replace("/", ".") if "stem" in v[0]: new_name = new_name.replace("conv2d.kernel", "conv.weight") new_name = new_name.replace("tpu_batch_normalization.beta", "bn.bias") new_name = new_name.replace("tpu_batch_normalization.gamma", "bn.weight") new_name = new_name.replace("tpu_batch_normalization.moving_mean", "bn.running_mean") new_name = new_name.replace("tpu_batch_normalization.moving_variance", "bn.running_var") elif "head" in v[0]: new_name = new_name.replace("conv2d.kernel", "project_conv.conv.weight") new_name = new_name.replace("dense.kernel", "classifier.weight") new_name = new_name.replace("dense.bias", "classifier.bias") new_name = new_name.replace("tpu_batch_normalization.beta", "project_conv.bn.bias") new_name = new_name.replace("tpu_batch_normalization.gamma", "project_conv.bn.weight") new_name = new_name.replace("tpu_batch_normalization.moving_mean", "project_conv.bn.running_mean") new_name = new_name.replace("tpu_batch_normalization.moving_variance", "project_conv.bn.running_var") elif "blocks" in v[0]: # e.g. blocks_0.conv2d.kernel -> 0 blocks_id = new_name.split(".", maxsplit=1)[0].replace("blocks_", "") new_name = new_name.replace("blocks_{}".format(blocks_id), "blocks.{}".format(blocks_id)) if int(blocks_id) <= stage0_num - 1: # expansion=1 fused_mbconv new_name = new_name.replace("conv2d.kernel", "project_conv.conv.weight") new_name = new_name.replace("tpu_batch_normalization.beta", "project_conv.bn.bias") new_name = new_name.replace("tpu_batch_normalization.gamma", "project_conv.bn.weight") new_name = new_name.replace("tpu_batch_normalization.moving_mean", "project_conv.bn.running_mean") new_name = new_name.replace("tpu_batch_normalization.moving_variance", "project_conv.bn.running_var") else: new_name = new_name.replace("blocks.{}.conv2d.kernel".format(blocks_id), "blocks.{}.expand_conv.conv.weight".format(blocks_id)) new_name = new_name.replace("tpu_batch_normalization.beta", "expand_conv.bn.bias") new_name = new_name.replace("tpu_batch_normalization.gamma", "expand_conv.bn.weight") new_name = new_name.replace("tpu_batch_normalization.moving_mean", "expand_conv.bn.running_mean") new_name = new_name.replace("tpu_batch_normalization.moving_variance", "expand_conv.bn.running_var") if int(blocks_id) <= fused_conv_num - 1: # fused_mbconv new_name = new_name.replace("blocks.{}.conv2d_1.kernel".format(blocks_id), "blocks.{}.project_conv.conv.weight".format(blocks_id)) new_name = new_name.replace("tpu_batch_normalization_1.beta", "project_conv.bn.bias") new_name = new_name.replace("tpu_batch_normalization_1.gamma", "project_conv.bn.weight") new_name = new_name.replace("tpu_batch_normalization_1.moving_mean", "project_conv.bn.running_mean") new_name = new_name.replace("tpu_batch_normalization_1.moving_variance", "project_conv.bn.running_var") else: # mbconv new_name = new_name.replace("blocks.{}.conv2d_1.kernel".format(blocks_id), "blocks.{}.project_conv.conv.weight".format(blocks_id)) new_name = new_name.replace("depthwise_conv2d.depthwise_kernel", "dwconv.conv.weight") new_name = new_name.replace("tpu_batch_normalization_1.beta", "dwconv.bn.bias") new_name = new_name.replace("tpu_batch_normalization_1.gamma", "dwconv.bn.weight") new_name = new_name.replace("tpu_batch_normalization_1.moving_mean", "dwconv.bn.running_mean") new_name = new_name.replace("tpu_batch_normalization_1.moving_variance", "dwconv.bn.running_var") new_name = new_name.replace("tpu_batch_normalization_2.beta", "project_conv.bn.bias") new_name = new_name.replace("tpu_batch_normalization_2.gamma", "project_conv.bn.weight") new_name = new_name.replace("tpu_batch_normalization_2.moving_mean", "project_conv.bn.running_mean") new_name = new_name.replace("tpu_batch_normalization_2.moving_variance", "project_conv.bn.running_var") new_name = new_name.replace("se.conv2d.bias", "se.conv_reduce.bias") new_name = new_name.replace("se.conv2d.kernel", "se.conv_reduce.weight") new_name = new_name.replace("se.conv2d_1.bias", "se.conv_expand.bias") new_name = new_name.replace("se.conv2d_1.kernel", "se.conv_expand.weight") else: print("not recognized name: " + v[0]) var = reader.get_tensor(v[0]) new_var = var if "conv" in new_name and "weight" in new_name and "bn" not in new_name and "dw" not in new_name: assert len(var.shape) == 4 # conv kernel [h, w, c, n] -> [n, c, h, w] new_var = np.transpose(var, (3, 2, 0, 1)) elif "bn" in new_name: pass elif "dwconv" in new_name and "weight" in new_name: # dw_kernel [h, w, n, c] -> [n, c, h, w] assert len(var.shape) == 4 new_var = np.transpose(var, (2, 3, 0, 1)) elif "classifier" in new_name and "weight" in new_name: assert len(var.shape) == 2 new_var = np.transpose(var, (1, 0)) new_weights[new_name] = torch.as_tensor(new_var) torch.save(new_weights, "pre_" + model_name + ".pth") if __name__ == '__main__': main(model_name="efficientnetv2-s", tf_weights_path="./efficientnetv2-s/model", stage0_num=2, fused_conv_num=10) # main(model_name="efficientnetv2-m", # tf_weights_path="./efficientnetv2-m/model", # stage0_num=3, # fused_conv_num=13) # main(model_name="efficientnetv2-l", # tf_weights_path="./efficientnetv2-l/model", # stage0_num=4, # fused_conv_num=18) ================================================ FILE: pytorch_classification/Test11_efficientnetV2/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() accu_loss = torch.zeros(1).to(device) # 累计损失 accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 optimizer.zero_grad() sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) loss.backward() accu_loss += loss.detach() data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return accu_loss.item() / (step + 1), accu_num.item() / sample_num @torch.no_grad() def evaluate(model, data_loader, device, epoch): loss_function = torch.nn.CrossEntropyLoss() model.eval() accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 accu_loss = torch.zeros(1).to(device) # 累计损失 sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) accu_loss += loss data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) return accu_loss.item() / (step + 1), accu_num.item() / sample_num ================================================ FILE: pytorch_classification/Test1_official_demo/model.py ================================================ import torch.nn as nn import torch.nn.functional as F class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(3, 16, 5) self.pool1 = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(16, 32, 5) self.pool2 = nn.MaxPool2d(2, 2) self.fc1 = nn.Linear(32*5*5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28) x = self.pool1(x) # output(16, 14, 14) x = F.relu(self.conv2(x)) # output(32, 10, 10) x = self.pool2(x) # output(32, 5, 5) x = x.view(-1, 32*5*5) # output(32*5*5) x = F.relu(self.fc1(x)) # output(120) x = F.relu(self.fc2(x)) # output(84) x = self.fc3(x) # output(10) return x ================================================ FILE: pytorch_classification/Test1_official_demo/predict.py ================================================ import torch import torchvision.transforms as transforms from PIL import Image from model import LeNet def main(): transform = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') net = LeNet() net.load_state_dict(torch.load('Lenet.pth')) im = Image.open('1.jpg') im = transform(im) # [C, H, W] im = torch.unsqueeze(im, dim=0) # [N, C, H, W] with torch.no_grad(): outputs = net(im) predict = torch.max(outputs, dim=1)[1].numpy() print(classes[int(predict)]) if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test1_official_demo/train.py ================================================ import torch import torchvision import torch.nn as nn from model import LeNet import torch.optim as optim import torchvision.transforms as transforms def main(): transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 50000张训练图片 # 第一次使用时要将download设置为True才会自动去下载数据集 train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=36, shuffle=True, num_workers=0) # 10000张验证图片 # 第一次使用时要将download设置为True才会自动去下载数据集 val_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform) val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000, shuffle=False, num_workers=0) val_data_iter = iter(val_loader) val_image, val_label = next(val_data_iter) # classes = ('plane', 'car', 'bird', 'cat', # 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') net = LeNet() loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001) for epoch in range(5): # loop over the dataset multiple times running_loss = 0.0 for step, data in enumerate(train_loader, start=0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = loss_function(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if step % 500 == 499: # print every 500 mini-batches with torch.no_grad(): outputs = net(val_image) # [batch, 10] predict_y = torch.max(outputs, dim=1)[1] accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0) print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, step + 1, running_loss / 500, accuracy)) running_loss = 0.0 print('Finished Training') save_path = './Lenet.pth' torch.save(net.state_dict(), save_path) if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test2_alexnet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test2_alexnet/model.py ================================================ import torch.nn as nn import torch class AlexNet(nn.Module): def __init__(self, num_classes=1000, init_weights=False): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55] nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27] nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27] nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13] nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13] nn.ReLU(inplace=True), nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13] nn.ReLU(inplace=True), nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13] nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6] ) self.classifier = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(128 * 6 * 6, 2048), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(2048, 2048), nn.ReLU(inplace=True), nn.Linear(2048, num_classes), ) if init_weights: self._initialize_weights() def forward(self, x): x = self.features(x) x = torch.flatten(x, start_dim=1) x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) ================================================ FILE: pytorch_classification/Test2_alexnet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import AlexNet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = AlexNet(num_classes=5).to(device) # load model weights weights_path = "./AlexNet.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) model.load_state_dict(torch.load(weights_path)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test2_alexnet/train.py ================================================ import os import sys import json import torch import torch.nn as nn from torchvision import transforms, datasets, utils import matplotlib.pyplot as plt import numpy as np import torch.optim as optim from tqdm import tqdm from model import AlexNet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) epochs = 10 save_path = './AlexNet.pth' best_acc = 0.0 train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test3_vggnet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test3_vggnet/model.py ================================================ import torch.nn as nn import torch # official pretrain weights model_urls = { 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth' } class VGG(nn.Module): def __init__(self, features, num_classes=1000, init_weights=False): super(VGG, self).__init__() self.features = features self.classifier = nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, num_classes) ) if init_weights: self._initialize_weights() def forward(self, x): # N x 3 x 224 x 224 x = self.features(x) # N x 512 x 7 x 7 x = torch.flatten(x, start_dim=1) # N x 512*7*7 x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.xavier_uniform_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight) # nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) def make_features(cfg: list): layers = [] in_channels = 3 for v in cfg: if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) layers += [conv2d, nn.ReLU(True)] in_channels = v return nn.Sequential(*layers) cfgs = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } def vgg(model_name="vgg16", **kwargs): assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name) cfg = cfgs[model_name] model = VGG(make_features(cfg), **kwargs) return model ================================================ FILE: pytorch_classification/Test3_vggnet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import vgg def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = vgg(model_name="vgg16", num_classes=5).to(device) # load model weights weights_path = "./vgg16Net.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) model.load_state_dict(torch.load(weights_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test3_vggnet/train.py ================================================ import os import sys import json import torch import torch.nn as nn from torchvision import transforms, datasets import torch.optim as optim from tqdm import tqdm from model import vgg def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "val": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() model_name = "vgg16" net = vgg(model_name=model_name, num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) epochs = 30 best_acc = 0.0 save_path = './{}Net.pth'.format(model_name) train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test4_googlenet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test4_googlenet/model.py ================================================ import torch.nn as nn import torch import torch.nn.functional as F class GoogLeNet(nn.Module): def __init__(self, num_classes=1000, aux_logits=True, init_weights=False): super(GoogLeNet, self).__init__() self.aux_logits = aux_logits self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3) self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.conv2 = BasicConv2d(64, 64, kernel_size=1) self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1) self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32) self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64) self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64) self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64) self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64) self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64) self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128) self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True) self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128) self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128) if self.aux_logits: self.aux1 = InceptionAux(512, num_classes) self.aux2 = InceptionAux(528, num_classes) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.4) self.fc = nn.Linear(1024, num_classes) if init_weights: self._initialize_weights() def forward(self, x): # N x 3 x 224 x 224 x = self.conv1(x) # N x 64 x 112 x 112 x = self.maxpool1(x) # N x 64 x 56 x 56 x = self.conv2(x) # N x 64 x 56 x 56 x = self.conv3(x) # N x 192 x 56 x 56 x = self.maxpool2(x) # N x 192 x 28 x 28 x = self.inception3a(x) # N x 256 x 28 x 28 x = self.inception3b(x) # N x 480 x 28 x 28 x = self.maxpool3(x) # N x 480 x 14 x 14 x = self.inception4a(x) # N x 512 x 14 x 14 if self.training and self.aux_logits: # eval model lose this layer aux1 = self.aux1(x) x = self.inception4b(x) # N x 512 x 14 x 14 x = self.inception4c(x) # N x 512 x 14 x 14 x = self.inception4d(x) # N x 528 x 14 x 14 if self.training and self.aux_logits: # eval model lose this layer aux2 = self.aux2(x) x = self.inception4e(x) # N x 832 x 14 x 14 x = self.maxpool4(x) # N x 832 x 7 x 7 x = self.inception5a(x) # N x 832 x 7 x 7 x = self.inception5b(x) # N x 1024 x 7 x 7 x = self.avgpool(x) # N x 1024 x 1 x 1 x = torch.flatten(x, 1) # N x 1024 x = self.dropout(x) x = self.fc(x) # N x 1000 (num_classes) if self.training and self.aux_logits: # eval model lose this layer return x, aux2, aux1 return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) class Inception(nn.Module): def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj): super(Inception, self).__init__() self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1) self.branch2 = nn.Sequential( BasicConv2d(in_channels, ch3x3red, kernel_size=1), BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1) # 保证输出大小等于输入大小 ) self.branch3 = nn.Sequential( BasicConv2d(in_channels, ch5x5red, kernel_size=1), # 在官方的实现中,其实是3x3的kernel并不是5x5,这里我也懒得改了,具体可以参考下面的issue # Please see https://github.com/pytorch/vision/issues/906 for details. BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2) # 保证输出大小等于输入大小 ) self.branch4 = nn.Sequential( nn.MaxPool2d(kernel_size=3, stride=1, padding=1), BasicConv2d(in_channels, pool_proj, kernel_size=1) ) def forward(self, x): branch1 = self.branch1(x) branch2 = self.branch2(x) branch3 = self.branch3(x) branch4 = self.branch4(x) outputs = [branch1, branch2, branch3, branch4] return torch.cat(outputs, 1) class InceptionAux(nn.Module): def __init__(self, in_channels, num_classes): super(InceptionAux, self).__init__() self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3) self.conv = BasicConv2d(in_channels, 128, kernel_size=1) # output[batch, 128, 4, 4] self.fc1 = nn.Linear(2048, 1024) self.fc2 = nn.Linear(1024, num_classes) def forward(self, x): # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 x = self.averagePool(x) # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 x = self.conv(x) # N x 128 x 4 x 4 x = torch.flatten(x, 1) x = F.dropout(x, 0.5, training=self.training) # N x 2048 x = F.relu(self.fc1(x), inplace=True) x = F.dropout(x, 0.5, training=self.training) # N x 1024 x = self.fc2(x) # N x num_classes return x class BasicConv2d(nn.Module): def __init__(self, in_channels, out_channels, **kwargs): super(BasicConv2d, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, **kwargs) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv(x) x = self.relu(x) return x ================================================ FILE: pytorch_classification/Test4_googlenet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import GoogLeNet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = GoogLeNet(num_classes=5, aux_logits=False).to(device) # load model weights weights_path = "./googleNet.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) missing_keys, unexpected_keys = model.load_state_dict(torch.load(weights_path, map_location=device), strict=False) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test4_googlenet/train.py ================================================ import os import sys import json import torch import torch.nn as nn from torchvision import transforms, datasets import torch.optim as optim from tqdm import tqdm from model import GoogLeNet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "val": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True) # 如果要使用官方的预训练权重,注意是将权重载入官方的模型,不是我们自己实现的模型 # 官方的模型中使用了bn层以及改了一些参数,不能混用 # import torchvision # net = torchvision.models.googlenet(num_classes=5) # model_dict = net.state_dict() # # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth # pretrain_model = torch.load("googlenet.pth") # del_list = ["aux1.fc2.weight", "aux1.fc2.bias", # "aux2.fc2.weight", "aux2.fc2.bias", # "fc.weight", "fc.bias"] # pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list} # model_dict.update(pretrain_dict) # net.load_state_dict(model_dict) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0003) epochs = 30 best_acc = 0.0 save_path = './googleNet.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits, aux_logits2, aux_logits1 = net(images.to(device)) loss0 = loss_function(logits, labels.to(device)) loss1 = loss_function(aux_logits1, labels.to(device)) loss2 = loss_function(aux_logits2, labels.to(device)) loss = loss0 + loss1 * 0.3 + loss2 * 0.3 loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # eval model only have last output layer predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test5_resnet/README.md ================================================ ## 文件结构: ``` ├── model.py: ResNet模型搭建 ├── train.py: 训练脚本 ├── predict.py: 单张图像预测脚本 └── batch_predict.py: 批量图像预测脚本 ``` ================================================ FILE: pytorch_classification/Test5_resnet/batch_predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms from model import resnet34 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image # 指向需要遍历预测的图像文件夹 imgs_root = "/data/imgs" assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist." # 读取指定文件夹下所有jpg图像路径 img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")] # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), f"file: '{json_path}' dose not exist." json_file = open(json_path, "r") class_indict = json.load(json_file) # create model model = resnet34(num_classes=5).to(device) # load model weights weights_path = "./resNet34.pth" assert os.path.exists(weights_path), f"file: '{weights_path}' dose not exist." model.load_state_dict(torch.load(weights_path, map_location=device)) # prediction model.eval() batch_size = 8 # 每次预测时将多少张图片打包成一个batch with torch.no_grad(): for ids in range(0, len(img_path_list) // batch_size): img_list = [] for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]: assert os.path.exists(img_path), f"file: '{img_path}' dose not exist." img = Image.open(img_path) img = data_transform(img) img_list.append(img) # batch img # 将img_list列表中的所有图像打包成一个batch batch_img = torch.stack(img_list, dim=0) # predict class output = model(batch_img.to(device)).cpu() predict = torch.softmax(output, dim=1) probs, classes = torch.max(predict, dim=1) for idx, (pro, cla) in enumerate(zip(probs, classes)): print("image: {} class: {} prob: {:.3}".format(img_path_list[ids * batch_size + idx], class_indict[str(cla.numpy())], pro.numpy())) if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test5_resnet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test5_resnet/load_weights.py ================================================ import os import torch import torch.nn as nn from model import resnet34 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load pretrain weights # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth model_weight_path = "./resnet34-pre.pth" assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path) # option1 net = resnet34() net.load_state_dict(torch.load(model_weight_path, map_location=device)) # change fc layer structure in_channel = net.fc.in_features net.fc = nn.Linear(in_channel, 5) # option2 # net = resnet34(num_classes=5) # pre_weights = torch.load(model_weight_path, map_location=device) # del_key = [] # for key, _ in pre_weights.items(): # if "fc" in key: # del_key.append(key) # # for key in del_key: # del pre_weights[key] # # missing_keys, unexpected_keys = net.load_state_dict(pre_weights, strict=False) # print("[missing_keys]:", *missing_keys, sep="\n") # print("[unexpected_keys]:", *unexpected_keys, sep="\n") if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test5_resnet/model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): """ 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 这么做的好处是能够在top1上提升大概0.5%的准确率。 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch """ expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, groups=1, width_per_group=64): super(Bottleneck, self).__init__() width = int(out_channel * (width_per_group / 64.)) * groups self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(width) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(width) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, groups=1, width_per_group=64): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.groups = groups self.width_per_group = width_per_group self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, groups=self.groups, width_per_group=self.width_per_group)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnet34-333f7ec4.pth return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet50(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnet50-19c8e357.pth return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) def resnext50_32x4d(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth groups = 32 width_per_group = 4 return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top, groups=groups, width_per_group=width_per_group) def resnext101_32x8d(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth groups = 32 width_per_group = 8 return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top, groups=groups, width_per_group=width_per_group) ================================================ FILE: pytorch_classification/Test5_resnet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import resnet34 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = resnet34(num_classes=5).to(device) # load model weights weights_path = "./resNet34.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) model.load_state_dict(torch.load(weights_path, map_location=device)) # prediction model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test5_resnet/train.py ================================================ import os import sys import json import torch import torch.nn as nn import torch.optim as optim from torchvision import transforms, datasets from tqdm import tqdm from model import resnet34 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 16 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) net = resnet34() # load pretrain weights # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth model_weight_path = "./resnet34-pre.pth" assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path) net.load_state_dict(torch.load(model_weight_path, map_location='cpu')) # for param in net.parameters(): # param.requires_grad = False # change fc layer structure in_channel = net.fc.in_features net.fc = nn.Linear(in_channel, 5) net.to(device) # define loss function loss_function = nn.CrossEntropyLoss() # construct an optimizer params = [p for p in net.parameters() if p.requires_grad] optimizer = optim.Adam(params, lr=0.0001) epochs = 3 best_acc = 0.0 save_path = './resNet34.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs) val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test6_mobilenet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test6_mobilenet/model_v2.py ================================================ from torch import nn import torch def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(nn.Sequential): def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1): padding = (kernel_size - 1) // 2 super(ConvBNReLU, self).__init__( nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False), nn.BatchNorm2d(out_channel), nn.ReLU6(inplace=True) ) class InvertedResidual(nn.Module): def __init__(self, in_channel, out_channel, stride, expand_ratio): super(InvertedResidual, self).__init__() hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel layers = [] if expand_ratio != 1: # 1x1 pointwise conv layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1)) layers.extend([ # 3x3 depthwise conv ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel), # 1x1 pointwise conv(linear) nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False), nn.BatchNorm2d(out_channel), ]) self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_shortcut: return x + self.conv(x) else: return self.conv(x) class MobileNetV2(nn.Module): def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] features = [] # conv1 layer features.append(ConvBNReLU(3, input_channel, stride=2)) # building inverted residual residual blockes for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel # building last several layers features.append(ConvBNReLU(input_channel, last_channel, 1)) # combine feature layers self.features = nn.Sequential(*features) # building classifier self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(last_channel, num_classes) ) # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ================================================ FILE: pytorch_classification/Test6_mobilenet/model_v3.py ================================================ from typing import Callable, List, Optional import torch from torch import nn, Tensor from torch.nn import functional as F from functools import partial def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNActivation(nn.Sequential): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None): padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.ReLU6 super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False), norm_layer(out_planes), activation_layer(inplace=True)) class SqueezeExcitation(nn.Module): def __init__(self, input_c: int, squeeze_factor: int = 4): super(SqueezeExcitation, self).__init__() squeeze_c = _make_divisible(input_c // squeeze_factor, 8) self.fc1 = nn.Conv2d(input_c, squeeze_c, 1) self.fc2 = nn.Conv2d(squeeze_c, input_c, 1) def forward(self, x: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) return scale * x class InvertedResidualConfig: def __init__(self, input_c: int, kernel: int, expanded_c: int, out_c: int, use_se: bool, activation: str, stride: int, width_multi: float): self.input_c = self.adjust_channels(input_c, width_multi) self.kernel = kernel self.expanded_c = self.adjust_channels(expanded_c, width_multi) self.out_c = self.adjust_channels(out_c, width_multi) self.use_se = use_se self.use_hs = activation == "HS" # whether using h-swish activation self.stride = stride @staticmethod def adjust_channels(channels: int, width_multi: float): return _make_divisible(channels * width_multi, 8) class InvertedResidual(nn.Module): def __init__(self, cnf: InvertedResidualConfig, norm_layer: Callable[..., nn.Module]): super(InvertedResidual, self).__init__() if cnf.stride not in [1, 2]: raise ValueError("illegal stride value.") self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c) layers: List[nn.Module] = [] activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU # expand if cnf.expanded_c != cnf.input_c: layers.append(ConvBNActivation(cnf.input_c, cnf.expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer)) # depthwise layers.append(ConvBNActivation(cnf.expanded_c, cnf.expanded_c, kernel_size=cnf.kernel, stride=cnf.stride, groups=cnf.expanded_c, norm_layer=norm_layer, activation_layer=activation_layer)) if cnf.use_se: layers.append(SqueezeExcitation(cnf.expanded_c)) # project layers.append(ConvBNActivation(cnf.expanded_c, cnf.out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)) self.block = nn.Sequential(*layers) self.out_channels = cnf.out_c self.is_strided = cnf.stride > 1 def forward(self, x: Tensor) -> Tensor: result = self.block(x) if self.use_res_connect: result += x return result class MobileNetV3(nn.Module): def __init__(self, inverted_residual_setting: List[InvertedResidualConfig], last_channel: int, num_classes: int = 1000, block: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None): super(MobileNetV3, self).__init__() if not inverted_residual_setting: raise ValueError("The inverted_residual_setting should not be empty.") elif not (isinstance(inverted_residual_setting, List) and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])): raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") if block is None: block = InvertedResidual if norm_layer is None: norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) layers: List[nn.Module] = [] # building first layer firstconv_output_c = inverted_residual_setting[0].input_c layers.append(ConvBNActivation(3, firstconv_output_c, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=nn.Hardswish)) # building inverted residual blocks for cnf in inverted_residual_setting: layers.append(block(cnf, norm_layer)) # building last several layers lastconv_input_c = inverted_residual_setting[-1].out_c lastconv_output_c = 6 * lastconv_input_c layers.append(ConvBNActivation(lastconv_input_c, lastconv_output_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Hardswish)) self.features = nn.Sequential(*layers) self.avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel), nn.Hardswish(inplace=True), nn.Dropout(p=0.2, inplace=True), nn.Linear(last_channel, num_classes)) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def _forward_impl(self, x: Tensor) -> Tensor: x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def mobilenet_v3_large(num_classes: int = 1000, reduced_tail: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride bneck_conf(16, 3, 16, 16, False, "RE", 1), bneck_conf(16, 3, 64, 24, False, "RE", 2), # C1 bneck_conf(24, 3, 72, 24, False, "RE", 1), bneck_conf(24, 5, 72, 40, True, "RE", 2), # C2 bneck_conf(40, 5, 120, 40, True, "RE", 1), bneck_conf(40, 5, 120, 40, True, "RE", 1), bneck_conf(40, 3, 240, 80, False, "HS", 2), # C3 bneck_conf(80, 3, 200, 80, False, "HS", 1), bneck_conf(80, 3, 184, 80, False, "HS", 1), bneck_conf(80, 3, 184, 80, False, "HS", 1), bneck_conf(80, 3, 480, 112, True, "HS", 1), bneck_conf(112, 3, 672, 112, True, "HS", 1), bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2), # C4 bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1), bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1), ] last_channel = adjust_channels(1280 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) def mobilenet_v3_small(num_classes: int = 1000, reduced_tail: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride bneck_conf(16, 3, 16, 16, True, "RE", 2), # C1 bneck_conf(16, 3, 72, 24, False, "RE", 2), # C2 bneck_conf(24, 3, 88, 24, False, "RE", 1), bneck_conf(24, 5, 96, 40, True, "HS", 2), # C3 bneck_conf(40, 5, 240, 40, True, "HS", 1), bneck_conf(40, 5, 240, 40, True, "HS", 1), bneck_conf(40, 5, 120, 48, True, "HS", 1), bneck_conf(48, 5, 144, 48, True, "HS", 1), bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2), # C4 bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1), bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1) ] last_channel = adjust_channels(1024 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) ================================================ FILE: pytorch_classification/Test6_mobilenet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model_v2 import MobileNetV2 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = MobileNetV2(num_classes=5).to(device) # load model weights model_weight_path = "./MobileNetV2.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test6_mobilenet/train.py ================================================ import os import sys import json import torch import torch.nn as nn import torch.optim as optim from torchvision import transforms, datasets from tqdm import tqdm from model_v2 import MobileNetV2 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) batch_size = 16 epochs = 5 data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) # create model net = MobileNetV2(num_classes=5) # load pretrain weights # download url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth model_weight_path = "./mobilenet_v2.pth" assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path) pre_weights = torch.load(model_weight_path, map_location='cpu') # delete classifier weights pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()} missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False) # freeze features weights for param in net.features.parameters(): param.requires_grad = False net.to(device) # define loss function loss_function = nn.CrossEntropyLoss() # construct an optimizer params = [p for p in net.parameters() if p.requires_grad] optimizer = optim.Adam(params, lr=0.0001) best_acc = 0.0 save_path = './MobileNetV2.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs) val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test7_shufflenet/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/Test7_shufflenet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: pytorch_classification/Test7_shufflenet/model.py ================================================ from typing import List, Callable import torch from torch import Tensor import torch.nn as nn def channel_shuffle(x: Tensor, groups: int) -> Tensor: batch_size, num_channels, height, width = x.size() channels_per_group = num_channels // groups # reshape # [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width] x = x.view(batch_size, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() # flatten x = x.view(batch_size, -1, height, width) return x class InvertedResidual(nn.Module): def __init__(self, input_c: int, output_c: int, stride: int): super(InvertedResidual, self).__init__() if stride not in [1, 2]: raise ValueError("illegal stride value.") self.stride = stride assert output_c % 2 == 0 branch_features = output_c // 2 # 当stride为1时,input_channel应该是branch_features的两倍 # python中 '<<' 是位运算,可理解为计算×2的快速方法 assert (self.stride != 1) or (input_c == branch_features << 1) if self.stride == 2: self.branch1 = nn.Sequential( self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1), nn.BatchNorm2d(input_c), nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True) ) else: self.branch1 = nn.Sequential() self.branch2 = nn.Sequential( nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1), nn.BatchNorm2d(branch_features), nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True) ) @staticmethod def depthwise_conv(input_c: int, output_c: int, kernel_s: int, stride: int = 1, padding: int = 0, bias: bool = False) -> nn.Conv2d: return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s, stride=stride, padding=padding, bias=bias, groups=input_c) def forward(self, x: Tensor) -> Tensor: if self.stride == 1: x1, x2 = x.chunk(2, dim=1) out = torch.cat((x1, self.branch2(x2)), dim=1) else: out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) out = channel_shuffle(out, 2) return out class ShuffleNetV2(nn.Module): def __init__(self, stages_repeats: List[int], stages_out_channels: List[int], num_classes: int = 1000, inverted_residual: Callable[..., nn.Module] = InvertedResidual): super(ShuffleNetV2, self).__init__() if len(stages_repeats) != 3: raise ValueError("expected stages_repeats as list of 3 positive ints") if len(stages_out_channels) != 5: raise ValueError("expected stages_out_channels as list of 5 positive ints") self._stage_out_channels = stages_out_channels # input RGB image input_channels = 3 output_channels = self._stage_out_channels[0] self.conv1 = nn.Sequential( nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(output_channels), nn.ReLU(inplace=True) ) input_channels = output_channels self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Static annotations for mypy self.stage2: nn.Sequential self.stage3: nn.Sequential self.stage4: nn.Sequential stage_names = ["stage{}".format(i) for i in [2, 3, 4]] for name, repeats, output_channels in zip(stage_names, stages_repeats, self._stage_out_channels[1:]): seq = [inverted_residual(input_channels, output_channels, 2)] for i in range(repeats - 1): seq.append(inverted_residual(output_channels, output_channels, 1)) setattr(self, name, nn.Sequential(*seq)) input_channels = output_channels output_channels = self._stage_out_channels[-1] self.conv5 = nn.Sequential( nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(output_channels), nn.ReLU(inplace=True) ) self.fc = nn.Linear(output_channels, num_classes) def _forward_impl(self, x: Tensor) -> Tensor: # See note [TorchScript super()] x = self.conv1(x) x = self.maxpool(x) x = self.stage2(x) x = self.stage3(x) x = self.stage4(x) x = self.conv5(x) x = x.mean([2, 3]) # global pool x = self.fc(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def shufflenet_v2_x0_5(num_classes=1000): """ Constructs a ShuffleNetV2 with 0.5x output channels, as described in `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" `. weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth :param num_classes: :return: """ model = ShuffleNetV2(stages_repeats=[4, 8, 4], stages_out_channels=[24, 48, 96, 192, 1024], num_classes=num_classes) return model def shufflenet_v2_x1_0(num_classes=1000): """ Constructs a ShuffleNetV2 with 1.0x output channels, as described in `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" `. weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth :param num_classes: :return: """ model = ShuffleNetV2(stages_repeats=[4, 8, 4], stages_out_channels=[24, 116, 232, 464, 1024], num_classes=num_classes) return model def shufflenet_v2_x1_5(num_classes=1000): """ Constructs a ShuffleNetV2 with 1.0x output channels, as described in `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" `. weight: https://download.pytorch.org/models/shufflenetv2_x1_5-3c479a10.pth :param num_classes: :return: """ model = ShuffleNetV2(stages_repeats=[4, 8, 4], stages_out_channels=[24, 176, 352, 704, 1024], num_classes=num_classes) return model def shufflenet_v2_x2_0(num_classes=1000): """ Constructs a ShuffleNetV2 with 1.0x output channels, as described in `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" `. weight: https://download.pytorch.org/models/shufflenetv2_x2_0-8be3c8ee.pth :param num_classes: :return: """ model = ShuffleNetV2(stages_repeats=[4, 8, 4], stages_out_channels=[24, 244, 488, 976, 2048], num_classes=num_classes) return model ================================================ FILE: pytorch_classification/Test7_shufflenet/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/Test7_shufflenet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import shufflenet_v2_x1_0 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = shufflenet_v2_x1_0(num_classes=5).to(device) # load model weights model_weight_path = "./weights/model-29.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test7_shufflenet/train.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import shufflenet_v2_x1_0 from my_dataset import MyDataSet from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # 如果存在预训练权重则载入 model = shufflenet_v2_x1_0(num_classes=args.num_classes).to(device) if args.weights != "": if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} print(model.load_state_dict(load_weights_dict, strict=False)) else: raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--lrf', type=float, default=0.1) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # shufflenetv2_x1.0 官方权重下载地址 # https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth parser.add_argument('--weights', type=str, default='./shufflenetv2_x1.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/Test7_shufflenet/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() mean_loss = torch.zeros(1).to(device) optimizer.zero_grad() data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) loss.backward() mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return mean_loss.item() @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 验证样本总个数 total_num = len(data_loader.dataset) # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() return sum_num.item() / total_num ================================================ FILE: pytorch_classification/Test8_densenet/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/Test8_densenet/model.py ================================================ import re from typing import Any, List, Tuple from collections import OrderedDict import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint as cp from torch import Tensor class _DenseLayer(nn.Module): def __init__(self, input_c: int, growth_rate: int, bn_size: int, drop_rate: float, memory_efficient: bool = False): super(_DenseLayer, self).__init__() self.add_module("norm1", nn.BatchNorm2d(input_c)) self.add_module("relu1", nn.ReLU(inplace=True)) self.add_module("conv1", nn.Conv2d(in_channels=input_c, out_channels=bn_size * growth_rate, kernel_size=1, stride=1, bias=False)) self.add_module("norm2", nn.BatchNorm2d(bn_size * growth_rate)) self.add_module("relu2", nn.ReLU(inplace=True)) self.add_module("conv2", nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)) self.drop_rate = drop_rate self.memory_efficient = memory_efficient def bn_function(self, inputs: List[Tensor]) -> Tensor: concat_features = torch.cat(inputs, 1) bottleneck_output = self.conv1(self.relu1(self.norm1(concat_features))) return bottleneck_output @staticmethod def any_requires_grad(inputs: List[Tensor]) -> bool: for tensor in inputs: if tensor.requires_grad: return True return False @torch.jit.unused def call_checkpoint_bottleneck(self, inputs: List[Tensor]) -> Tensor: def closure(*inp): return self.bn_function(inp) return cp.checkpoint(closure, *inputs) def forward(self, inputs: Tensor) -> Tensor: if isinstance(inputs, Tensor): prev_features = [inputs] else: prev_features = inputs if self.memory_efficient and self.any_requires_grad(prev_features): if torch.jit.is_scripting(): raise Exception("memory efficient not supported in JIT") bottleneck_output = self.call_checkpoint_bottleneck(prev_features) else: bottleneck_output = self.bn_function(prev_features) new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) if self.drop_rate > 0: new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) return new_features class _DenseBlock(nn.ModuleDict): _version = 2 def __init__(self, num_layers: int, input_c: int, bn_size: int, growth_rate: int, drop_rate: float, memory_efficient: bool = False): super(_DenseBlock, self).__init__() for i in range(num_layers): layer = _DenseLayer(input_c + i * growth_rate, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate, memory_efficient=memory_efficient) self.add_module("denselayer%d" % (i + 1), layer) def forward(self, init_features: Tensor) -> Tensor: features = [init_features] for name, layer in self.items(): new_features = layer(features) features.append(new_features) return torch.cat(features, 1) class _Transition(nn.Sequential): def __init__(self, input_c: int, output_c: int): super(_Transition, self).__init__() self.add_module("norm", nn.BatchNorm2d(input_c)) self.add_module("relu", nn.ReLU(inplace=True)) self.add_module("conv", nn.Conv2d(input_c, output_c, kernel_size=1, stride=1, bias=False)) self.add_module("pool", nn.AvgPool2d(kernel_size=2, stride=2)) class DenseNet(nn.Module): """ Densenet-BC model class for imagenet Args: growth_rate (int) - how many filters to add each layer (`k` in paper) block_config (list of 4 ints) - how many layers in each pooling block num_init_features (int) - the number of filters to learn in the first convolution layer bn_size (int) - multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer) drop_rate (float) - dropout rate after each dense layer num_classes (int) - number of classification classes memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient """ def __init__(self, growth_rate: int = 32, block_config: Tuple[int, int, int, int] = (6, 12, 24, 16), num_init_features: int = 64, bn_size: int = 4, drop_rate: float = 0, num_classes: int = 1000, memory_efficient: bool = False): super(DenseNet, self).__init__() # first conv+bn+relu+pool self.features = nn.Sequential(OrderedDict([ ("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), ("norm0", nn.BatchNorm2d(num_init_features)), ("relu0", nn.ReLU(inplace=True)), ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ])) # each dense block num_features = num_init_features for i, num_layers in enumerate(block_config): block = _DenseBlock(num_layers=num_layers, input_c=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate, memory_efficient=memory_efficient) self.features.add_module("denseblock%d" % (i + 1), block) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: trans = _Transition(input_c=num_features, output_c=num_features // 2) self.features.add_module("transition%d" % (i + 1), trans) num_features = num_features // 2 # finnal batch norm self.features.add_module("norm5", nn.BatchNorm2d(num_features)) # fc layer self.classifier = nn.Linear(num_features, num_classes) # init weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.constant_(m.bias, 0) def forward(self, x: Tensor) -> Tensor: features = self.features(x) out = F.relu(features, inplace=True) out = F.adaptive_avg_pool2d(out, (1, 1)) out = torch.flatten(out, 1) out = self.classifier(out) return out def densenet121(**kwargs: Any) -> DenseNet: # Top-1 error: 25.35% # 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth' return DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, **kwargs) def densenet169(**kwargs: Any) -> DenseNet: # Top-1 error: 24.00% # 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth' return DenseNet(growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, **kwargs) def densenet201(**kwargs: Any) -> DenseNet: # Top-1 error: 22.80% # 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth' return DenseNet(growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, **kwargs) def densenet161(**kwargs: Any) -> DenseNet: # Top-1 error: 22.35% # 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth' return DenseNet(growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, **kwargs) def load_state_dict(model: nn.Module, weights_path: str) -> None: # '.'s are no longer allowed in module names, but previous _DenseLayer # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. # They are also in the checkpoints in model_urls. This pattern is used # to find such keys. pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') state_dict = torch.load(weights_path) num_classes = model.classifier.out_features load_fc = num_classes == 1000 for key in list(state_dict.keys()): if load_fc is False: if "classifier" in key: del state_dict[key] res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict, strict=load_fc) print("successfully load pretrain-weights.") ================================================ FILE: pytorch_classification/Test8_densenet/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/Test8_densenet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import densenet121 def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = densenet121(num_classes=5).to(device) # load model weights model_weight_path = "./weights/model-3.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test8_densenet/train.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import densenet121, load_state_dict from my_dataset import MyDataSet from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # 如果存在预训练权重则载入 model = densenet121(num_classes=args.num_classes).to(device) if args.weights != "": if os.path.exists(args.weights): load_state_dict(model, args.weights) else: raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "classifier" not in name: para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4, nesterov=True) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.1) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # densenet121 官方权重下载地址 # https://download.pytorch.org/models/densenet121-a639ec97.pth parser.add_argument('--weights', type=str, default='densenet121.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/Test8_densenet/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() mean_loss = torch.zeros(1).to(device) optimizer.zero_grad() data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) loss.backward() mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return mean_loss.item() @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 验证样本总个数 total_num = len(data_loader.dataset) # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() return sum_num.item() / total_num ================================================ FILE: pytorch_classification/Test9_efficientNet/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw 密码: 090i 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/Test9_efficientNet/model.py ================================================ import math import copy from functools import partial from collections import OrderedDict from typing import Optional, Callable import torch import torch.nn as nn from torch import Tensor from torch.nn import functional as F def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch def drop_path(x, drop_prob: float = 0., training: bool = False): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf This function is taken from the rwightman. It can be seen here: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140 """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class ConvBNActivation(nn.Sequential): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None): padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.SiLU # alias Swish (torch>=1.7) super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False), norm_layer(out_planes), activation_layer()) class SqueezeExcitation(nn.Module): def __init__(self, input_c: int, # block input channel expand_c: int, # block expand channel squeeze_factor: int = 4): super(SqueezeExcitation, self).__init__() squeeze_c = input_c // squeeze_factor self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1) self.ac1 = nn.SiLU() # alias Swish self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1) self.ac2 = nn.Sigmoid() def forward(self, x: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) scale = self.fc1(scale) scale = self.ac1(scale) scale = self.fc2(scale) scale = self.ac2(scale) return scale * x class InvertedResidualConfig: # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate def __init__(self, kernel: int, # 3 or 5 input_c: int, out_c: int, expanded_ratio: int, # 1 or 6 stride: int, # 1 or 2 use_se: bool, # True drop_rate: float, index: str, # 1a, 2a, 2b, ... width_coefficient: float): self.input_c = self.adjust_channels(input_c, width_coefficient) self.kernel = kernel self.expanded_c = self.input_c * expanded_ratio self.out_c = self.adjust_channels(out_c, width_coefficient) self.use_se = use_se self.stride = stride self.drop_rate = drop_rate self.index = index @staticmethod def adjust_channels(channels: int, width_coefficient: float): return _make_divisible(channels * width_coefficient, 8) class InvertedResidual(nn.Module): def __init__(self, cnf: InvertedResidualConfig, norm_layer: Callable[..., nn.Module]): super(InvertedResidual, self).__init__() if cnf.stride not in [1, 2]: raise ValueError("illegal stride value.") self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c) layers = OrderedDict() activation_layer = nn.SiLU # alias Swish # expand if cnf.expanded_c != cnf.input_c: layers.update({"expand_conv": ConvBNActivation(cnf.input_c, cnf.expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer)}) # depthwise layers.update({"dwconv": ConvBNActivation(cnf.expanded_c, cnf.expanded_c, kernel_size=cnf.kernel, stride=cnf.stride, groups=cnf.expanded_c, norm_layer=norm_layer, activation_layer=activation_layer)}) if cnf.use_se: layers.update({"se": SqueezeExcitation(cnf.input_c, cnf.expanded_c)}) # project layers.update({"project_conv": ConvBNActivation(cnf.expanded_c, cnf.out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)}) self.block = nn.Sequential(layers) self.out_channels = cnf.out_c self.is_strided = cnf.stride > 1 # 只有在使用shortcut连接时才使用dropout层 if self.use_res_connect and cnf.drop_rate > 0: self.dropout = DropPath(cnf.drop_rate) else: self.dropout = nn.Identity() def forward(self, x: Tensor) -> Tensor: result = self.block(x) result = self.dropout(result) if self.use_res_connect: result += x return result class EfficientNet(nn.Module): def __init__(self, width_coefficient: float, depth_coefficient: float, num_classes: int = 1000, dropout_rate: float = 0.2, drop_connect_rate: float = 0.2, block: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None ): super(EfficientNet, self).__init__() # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate, repeats default_cnf = [[3, 32, 16, 1, 1, True, drop_connect_rate, 1], [3, 16, 24, 6, 2, True, drop_connect_rate, 2], [5, 24, 40, 6, 2, True, drop_connect_rate, 2], [3, 40, 80, 6, 2, True, drop_connect_rate, 3], [5, 80, 112, 6, 1, True, drop_connect_rate, 3], [5, 112, 192, 6, 2, True, drop_connect_rate, 4], [3, 192, 320, 6, 1, True, drop_connect_rate, 1]] def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) if block is None: block = InvertedResidual if norm_layer is None: norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_coefficient=width_coefficient) # build inverted_residual_setting bneck_conf = partial(InvertedResidualConfig, width_coefficient=width_coefficient) b = 0 num_blocks = float(sum(round_repeats(i[-1]) for i in default_cnf)) inverted_residual_setting = [] for stage, args in enumerate(default_cnf): cnf = copy.copy(args) for i in range(round_repeats(cnf.pop(-1))): if i > 0: # strides equal 1 except first cnf cnf[-3] = 1 # strides cnf[1] = cnf[2] # input_channel equal output_channel cnf[-1] = args[-2] * b / num_blocks # update dropout ratio index = str(stage + 1) + chr(i + 97) # 1a, 2a, 2b, ... inverted_residual_setting.append(bneck_conf(*cnf, index)) b += 1 # create layers layers = OrderedDict() # first conv layers.update({"stem_conv": ConvBNActivation(in_planes=3, out_planes=adjust_channels(32), kernel_size=3, stride=2, norm_layer=norm_layer)}) # building inverted residual blocks for cnf in inverted_residual_setting: layers.update({cnf.index: block(cnf, norm_layer)}) # build top last_conv_input_c = inverted_residual_setting[-1].out_c last_conv_output_c = adjust_channels(1280) layers.update({"top": ConvBNActivation(in_planes=last_conv_input_c, out_planes=last_conv_output_c, kernel_size=1, norm_layer=norm_layer)}) self.features = nn.Sequential(layers) self.avgpool = nn.AdaptiveAvgPool2d(1) classifier = [] if dropout_rate > 0: classifier.append(nn.Dropout(p=dropout_rate, inplace=True)) classifier.append(nn.Linear(last_conv_output_c, num_classes)) self.classifier = nn.Sequential(*classifier) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def _forward_impl(self, x: Tensor) -> Tensor: x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def efficientnet_b0(num_classes=1000): # input image size 224x224 return EfficientNet(width_coefficient=1.0, depth_coefficient=1.0, dropout_rate=0.2, num_classes=num_classes) def efficientnet_b1(num_classes=1000): # input image size 240x240 return EfficientNet(width_coefficient=1.0, depth_coefficient=1.1, dropout_rate=0.2, num_classes=num_classes) def efficientnet_b2(num_classes=1000): # input image size 260x260 return EfficientNet(width_coefficient=1.1, depth_coefficient=1.2, dropout_rate=0.3, num_classes=num_classes) def efficientnet_b3(num_classes=1000): # input image size 300x300 return EfficientNet(width_coefficient=1.2, depth_coefficient=1.4, dropout_rate=0.3, num_classes=num_classes) def efficientnet_b4(num_classes=1000): # input image size 380x380 return EfficientNet(width_coefficient=1.4, depth_coefficient=1.8, dropout_rate=0.4, num_classes=num_classes) def efficientnet_b5(num_classes=1000): # input image size 456x456 return EfficientNet(width_coefficient=1.6, depth_coefficient=2.2, dropout_rate=0.4, num_classes=num_classes) def efficientnet_b6(num_classes=1000): # input image size 528x528 return EfficientNet(width_coefficient=1.8, depth_coefficient=2.6, dropout_rate=0.5, num_classes=num_classes) def efficientnet_b7(num_classes=1000): # input image size 600x600 return EfficientNet(width_coefficient=2.0, depth_coefficient=3.1, dropout_rate=0.5, num_classes=num_classes) ================================================ FILE: pytorch_classification/Test9_efficientNet/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/Test9_efficientNet/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import efficientnet_b0 as create_model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") img_size = {"B0": 224, "B1": 240, "B2": 260, "B3": 300, "B4": 380, "B5": 456, "B6": 528, "B7": 600} num_model = "B0" data_transform = transforms.Compose( [transforms.Resize(img_size[num_model]), transforms.CenterCrop(img_size[num_model]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=5).to(device) # load model weights model_weight_path = "./weights/model-29.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test9_efficientNet/requirements.txt ================================================ numpy matplotlib tqdm==4.56.0 torch>=1.7.1 torchvision>=0.8.2 ================================================ FILE: pytorch_classification/Test9_efficientNet/train.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import efficientnet_b0 as create_model from my_dataset import MyDataSet from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) img_size = {"B0": 224, "B1": 240, "B2": 260, "B3": 300, "B4": 380, "B5": 456, "B6": 528, "B7": 600} num_model = "B0" data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(img_size[num_model]), transforms.CenterCrop(img_size[num_model]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # 如果存在预训练权重则载入 model = create_model(num_classes=args.num_classes).to(device) if args.weights != "": if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} print(model.load_state_dict(load_weights_dict, strict=False)) else: raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后一个卷积层和全连接层外,其他权重全部冻结 if ("features.top" not in name) and ("classifier" not in name): para.requires_grad_(False) else: print("training {}".format(name)) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--lrf', type=float, default=0.01) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # download model weights # 链接: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw 密码: 090i parser.add_argument('--weights', type=str, default='./efficientnetb0.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/Test9_efficientNet/trans_weights_to_pytorch.py ================================================ import numpy as np import torch import tensorflow as tf assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): # save pytorch weights path save_path = "./efficientnetb0.pth" # create keras model and download weights # EfficientNetB0, EfficientNetB1, EfficientNetB2, ... m = tf.keras.applications.EfficientNetB0() weights_dict = dict() weights = m.weights[3:] # delete norm weights for weight in weights: name = weight.name data = weight.numpy() if "stem_conv/kernel:0" == name: torch_name = "features.stem_conv.0.weight" weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32) elif "stem_bn/gamma:0" == name: torch_name = "features.stem_conv.1.weight" weights_dict[torch_name] = data elif "stem_bn/beta:0" == name: torch_name = "features.stem_conv.1.bias" weights_dict[torch_name] = data elif "stem_bn/moving_mean:0" == name: torch_name = "features.stem_conv.1.running_mean" weights_dict[torch_name] = data elif "stem_bn/moving_variance:0" == name: torch_name = "features.stem_conv.1.running_var" weights_dict[torch_name] = data elif "block" in name: name = name[5:] # delete "block" word block_index = name[:2] # 1a, 2a, ... name = name[3:] # delete block_index and "_" torch_prefix = "features.{}.block.".format(block_index) trans_dict = {"expand_conv/kernel:0": "expand_conv.0.weight", "expand_bn/gamma:0": "expand_conv.1.weight", "expand_bn/beta:0": "expand_conv.1.bias", "expand_bn/moving_mean:0": "expand_conv.1.running_mean", "expand_bn/moving_variance:0": "expand_conv.1.running_var", "dwconv/depthwise_kernel:0": "dwconv.0.weight", "bn/gamma:0": "dwconv.1.weight", "bn/beta:0": "dwconv.1.bias", "bn/moving_mean:0": "dwconv.1.running_mean", "bn/moving_variance:0": "dwconv.1.running_var", "se_reduce/kernel:0": "se.fc1.weight", "se_reduce/bias:0": "se.fc1.bias", "se_expand/kernel:0": "se.fc2.weight", "se_expand/bias:0": "se.fc2.bias", "project_conv/kernel:0": "project_conv.0.weight", "project_bn/gamma:0": "project_conv.1.weight", "project_bn/beta:0": "project_conv.1.bias", "project_bn/moving_mean:0": "project_conv.1.running_mean", "project_bn/moving_variance:0": "project_conv.1.running_var"} assert name in trans_dict, "key '{}' not in trans_dict".format(name) torch_postfix = trans_dict[name] torch_name = torch_prefix + torch_postfix if torch_postfix in ["expand_conv.0.weight", "se.fc1.weight", "se.fc2.weight", "project_conv.0.weight"]: data = np.transpose(data, (3, 2, 0, 1)).astype(np.float32) elif torch_postfix == "dwconv.0.weight": data = np.transpose(data, (2, 3, 0, 1)).astype(np.float32) weights_dict[torch_name] = data elif "top_conv/kernel:0" == name: torch_name = "features.top.0.weight" weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32) elif "top_bn/gamma:0" == name: torch_name = "features.top.1.weight" weights_dict[torch_name] = data elif "top_bn/beta:0" == name: torch_name = "features.top.1.bias" weights_dict[torch_name] = data elif "top_bn/moving_mean:0" == name: torch_name = "features.top.1.running_mean" weights_dict[torch_name] = data elif "top_bn/moving_variance:0" == name: torch_name = "features.top.1.running_var" weights_dict[torch_name] = data elif "predictions/kernel:0" == name: torch_name = "classifier.1.weight" weights_dict[torch_name] = np.transpose(data, (1, 0)).astype(np.float32) elif "predictions/bias:0" == name: torch_name = "classifier.1.bias" weights_dict[torch_name] = data else: raise KeyError("no match key '{}'".format(name)) for k, v in weights_dict.items(): weights_dict[k] = torch.as_tensor(v) torch.save(weights_dict, save_path) print("Conversion complete.") if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/Test9_efficientNet/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() mean_loss = torch.zeros(1).to(device) optimizer.zero_grad() data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) loss.backward() mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return mean_loss.item() @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 验证样本总个数 total_num = len(data_loader.dataset) # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() return sum_num.item() / total_num ================================================ FILE: pytorch_classification/analyze_weights_featuremap/alexnet_model.py ================================================ import torch.nn as nn import torch class AlexNet(nn.Module): def __init__(self, num_classes=1000, init_weights=False): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55] nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27] nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27] nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13] nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13] nn.ReLU(inplace=True), nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13] nn.ReLU(inplace=True), nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13] nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6] ) self.classifier = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(128 * 6 * 6, 2048), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(2048, 2048), nn.ReLU(inplace=True), nn.Linear(2048, num_classes), ) if init_weights: self._initialize_weights() def forward(self, x): outputs = [] for name, module in self.features.named_children(): x = module(x) if name in ["0", "3", "6"]: outputs.append(x) return outputs def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) ================================================ FILE: pytorch_classification/analyze_weights_featuremap/analyze_feature_map.py ================================================ import torch from alexnet_model import AlexNet from resnet_model import resnet34 import matplotlib.pyplot as plt import numpy as np from PIL import Image from torchvision import transforms data_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # data_transform = transforms.Compose( # [transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # create model model = AlexNet(num_classes=5) # model = resnet34(num_classes=5) # load model weights model_weight_path = "./AlexNet.pth" # "./resNet34.pth" model.load_state_dict(torch.load(model_weight_path)) print(model) # load image img = Image.open("../tulip.jpg") # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # forward out_put = model(img) for feature_map in out_put: # [N, C, H, W] -> [C, H, W] im = np.squeeze(feature_map.detach().numpy()) # [C, H, W] -> [H, W, C] im = np.transpose(im, [1, 2, 0]) # show top 12 feature maps plt.figure() for i in range(12): ax = plt.subplot(3, 4, i+1) # [H, W, C] plt.imshow(im[:, :, i], cmap='gray') plt.show() ================================================ FILE: pytorch_classification/analyze_weights_featuremap/analyze_kernel_weight.py ================================================ import torch from alexnet_model import AlexNet from resnet_model import resnet34 import matplotlib.pyplot as plt import numpy as np # create model model = AlexNet(num_classes=5) # model = resnet34(num_classes=5) # load model weights model_weight_path = "./AlexNet.pth" # "resNet34.pth" model.load_state_dict(torch.load(model_weight_path)) print(model) weights_keys = model.state_dict().keys() for key in weights_keys: # remove num_batches_tracked para(in bn) if "num_batches_tracked" in key: continue # [kernel_number, kernel_channel, kernel_height, kernel_width] weight_t = model.state_dict()[key].numpy() # read a kernel information # k = weight_t[0, :, :, :] # calculate mean, std, min, max weight_mean = weight_t.mean() weight_std = weight_t.std(ddof=1) weight_min = weight_t.min() weight_max = weight_t.max() print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean, weight_std, weight_max, weight_min)) # plot hist image plt.close() weight_vec = np.reshape(weight_t, [-1]) plt.hist(weight_vec, bins=50) plt.title(key) plt.show() ================================================ FILE: pytorch_classification/analyze_weights_featuremap/resnet_model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): outputs = [] x = self.conv1(x) outputs.append(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) outputs.append(x) # x = self.layer2(x) # x = self.layer3(x) # x = self.layer4(x) # # if self.include_top: # x = self.avgpool(x) # x = torch.flatten(x, 1) # x = self.fc(x) return outputs def resnet34(num_classes=1000, include_top=True): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: pytorch_classification/custom_dataset/main.py ================================================ import os import torch from torchvision import transforms from my_dataset import MyDataSet from utils import read_split_data, plot_data_loader_image # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz root = "/home/wz/my_github/data_set/flower_data/flower_photos" # 数据集所在根目录 def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) batch_size = 8 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers'.format(nw)) train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=batch_size, shuffle=True, num_workers=nw, collate_fn=train_data_set.collate_fn) # plot_data_loader_image(train_loader) for step, data in enumerate(train_loader): images, labels = data if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/custom_dataset/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/custom_dataset/utils.py ================================================ import os import json import pickle import random import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list ================================================ FILE: pytorch_classification/grad_cam/README.md ================================================ ## Grad-CAM - Original Impl: [https://github.com/jacobgil/pytorch-grad-cam](https://github.com/jacobgil/pytorch-grad-cam) - Grad-CAM简介: [https://b23.tv/1kccjmb](https://b23.tv/1kccjmb) - 使用Pytorch实现Grad-CAM并绘制热力图: [https://b23.tv/n1e60vN](https://b23.tv/n1e60vN) ## 使用流程(替换成自己的网络) 1. 将创建模型部分代码替换成自己创建模型的代码,并载入自己训练好的权重 2. 根据自己网络设置合适的`target_layers` 3. 根据自己的网络设置合适的预处理方法 4. 将要预测的图片路径赋值给`img_path` 5. 将感兴趣的类别id赋值给`target_category` ================================================ FILE: pytorch_classification/grad_cam/imagenet1k_classes.txt ================================================ tench, Tinca tinca goldfish, Carassius auratus great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias tiger shark, Galeocerdo cuvieri hammerhead, hammerhead shark electric ray, crampfish, numbfish, torpedo stingray cock hen ostrich, Struthio camelus brambling, Fringilla montifringilla goldfinch, Carduelis carduelis house finch, linnet, Carpodacus mexicanus junco, snowbird indigo bunting, indigo finch, indigo bird, Passerina cyanea robin, American robin, Turdus migratorius bulbul jay magpie chickadee water ouzel, dipper kite bald eagle, American eagle, Haliaeetus leucocephalus vulture great grey owl, great gray owl, Strix nebulosa European fire salamander, Salamandra salamandra common newt, Triturus vulgaris eft spotted salamander, Ambystoma maculatum axolotl, mud puppy, Ambystoma mexicanum bullfrog, Rana catesbeiana tree frog, tree-frog tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui loggerhead, loggerhead turtle, Caretta caretta leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea mud turtle terrapin box turtle, box tortoise banded gecko common iguana, iguana, Iguana iguana American chameleon, anole, Anolis carolinensis whiptail, whiptail lizard agama frilled lizard, Chlamydosaurus kingi alligator lizard Gila monster, Heloderma suspectum green lizard, Lacerta viridis African chameleon, Chamaeleo chamaeleon Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis African crocodile, Nile crocodile, Crocodylus niloticus American alligator, Alligator mississipiensis triceratops thunder snake, worm snake, Carphophis amoenus ringneck snake, ring-necked snake, ring snake hognose snake, puff adder, sand viper green snake, grass snake king snake, kingsnake garter snake, grass snake water snake vine snake night snake, Hypsiglena torquata boa constrictor, Constrictor constrictor rock python, rock snake, Python sebae Indian cobra, Naja naja green mamba sea snake horned viper, cerastes, sand viper, horned asp, Cerastes cornutus diamondback, diamondback rattlesnake, Crotalus adamanteus sidewinder, horned rattlesnake, Crotalus cerastes trilobite harvestman, daddy longlegs, Phalangium opilio scorpion black and gold garden spider, Argiope aurantia barn spider, Araneus cavaticus garden spider, Aranea diademata black widow, Latrodectus mactans tarantula wolf spider, hunting spider tick centipede black grouse ptarmigan ruffed grouse, partridge, Bonasa umbellus prairie chicken, prairie grouse, prairie fowl peacock quail partridge African grey, African gray, Psittacus erithacus macaw sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita lorikeet coucal bee eater hornbill hummingbird jacamar toucan drake red-breasted merganser, Mergus serrator goose black swan, Cygnus atratus tusker echidna, spiny anteater, anteater platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus wallaby, brush kangaroo koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus wombat jellyfish sea anemone, anemone brain coral flatworm, platyhelminth nematode, nematode worm, roundworm conch snail slug sea slug, nudibranch chiton, coat-of-mail shell, sea cradle, polyplacophore chambered nautilus, pearly nautilus, nautilus Dungeness crab, Cancer magister rock crab, Cancer irroratus fiddler crab king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica American lobster, Northern lobster, Maine lobster, Homarus americanus spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish crayfish, crawfish, crawdad, crawdaddy hermit crab isopod white stork, Ciconia ciconia black stork, Ciconia nigra spoonbill flamingo little blue heron, Egretta caerulea American egret, great white heron, Egretta albus bittern crane limpkin, Aramus pictus European gallinule, Porphyrio porphyrio American coot, marsh hen, mud hen, water hen, Fulica americana bustard ruddy turnstone, Arenaria interpres red-backed sandpiper, dunlin, Erolia alpina redshank, Tringa totanus dowitcher oystercatcher, oyster catcher pelican king penguin, Aptenodytes patagonica albatross, mollymawk grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus killer whale, killer, orca, grampus, sea wolf, Orcinus orca dugong, Dugong dugon sea lion Chihuahua Japanese spaniel Maltese dog, Maltese terrier, Maltese Pekinese, Pekingese, Peke Shih-Tzu Blenheim spaniel papillon toy terrier Rhodesian ridgeback Afghan hound, Afghan basset, basset hound beagle bloodhound, sleuthhound bluetick black-and-tan coonhound Walker hound, Walker foxhound English foxhound redbone borzoi, Russian wolfhound Irish wolfhound Italian greyhound whippet Ibizan hound, Ibizan Podenco Norwegian elkhound, elkhound otterhound, otter hound Saluki, gazelle hound Scottish deerhound, deerhound Weimaraner Staffordshire bullterrier, Staffordshire bull terrier American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier Bedlington terrier Border terrier Kerry blue terrier Irish terrier Norfolk terrier Norwich terrier Yorkshire terrier wire-haired fox terrier Lakeland terrier Sealyham terrier, Sealyham Airedale, Airedale terrier cairn, cairn terrier Australian terrier Dandie Dinmont, Dandie Dinmont terrier Boston bull, Boston terrier miniature schnauzer giant schnauzer standard schnauzer Scotch terrier, Scottish terrier, Scottie Tibetan terrier, chrysanthemum dog silky terrier, Sydney silky soft-coated wheaten terrier West Highland white terrier Lhasa, Lhasa apso flat-coated retriever curly-coated retriever golden retriever Labrador retriever Chesapeake Bay retriever German short-haired pointer vizsla, Hungarian pointer English setter Irish setter, red setter Gordon setter Brittany spaniel clumber, clumber spaniel English springer, English springer spaniel Welsh springer spaniel cocker spaniel, English cocker spaniel, cocker Sussex spaniel Irish water spaniel kuvasz schipperke groenendael malinois briard kelpie komondor Old English sheepdog, bobtail Shetland sheepdog, Shetland sheep dog, Shetland collie Border collie Bouvier des Flandres, Bouviers des Flandres Rottweiler German shepherd, German shepherd dog, German police dog, alsatian Doberman, Doberman pinscher miniature pinscher Greater Swiss Mountain dog Bernese mountain dog Appenzeller EntleBucher boxer bull mastiff Tibetan mastiff French bulldog Great Dane Saint Bernard, St Bernard Eskimo dog, husky malamute, malemute, Alaskan malamute Siberian husky dalmatian, coach dog, carriage dog affenpinscher, monkey pinscher, monkey dog basenji pug, pug-dog Leonberg Newfoundland, Newfoundland dog Great Pyrenees Samoyed, Samoyede Pomeranian chow, chow chow keeshond Brabancon griffon Pembroke, Pembroke Welsh corgi Cardigan, Cardigan Welsh corgi toy poodle miniature poodle standard poodle Mexican hairless timber wolf, grey wolf, gray wolf, Canis lupus white wolf, Arctic wolf, Canis lupus tundrarum red wolf, maned wolf, Canis rufus, Canis niger coyote, prairie wolf, brush wolf, Canis latrans dingo, warrigal, warragal, Canis dingo dhole, Cuon alpinus African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus hyena, hyaena red fox, Vulpes vulpes kit fox, Vulpes macrotis Arctic fox, white fox, Alopex lagopus grey fox, gray fox, Urocyon cinereoargenteus tabby, tabby cat tiger cat Persian cat Siamese cat, Siamese Egyptian cat cougar, puma, catamount, mountain lion, painter, panther, Felis concolor lynx, catamount leopard, Panthera pardus snow leopard, ounce, Panthera uncia jaguar, panther, Panthera onca, Felis onca lion, king of beasts, Panthera leo tiger, Panthera tigris cheetah, chetah, Acinonyx jubatus brown bear, bruin, Ursus arctos American black bear, black bear, Ursus americanus, Euarctos americanus ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus sloth bear, Melursus ursinus, Ursus ursinus mongoose meerkat, mierkat tiger beetle ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle ground beetle, carabid beetle long-horned beetle, longicorn, longicorn beetle leaf beetle, chrysomelid dung beetle rhinoceros beetle weevil fly bee ant, emmet, pismire grasshopper, hopper cricket walking stick, walkingstick, stick insect cockroach, roach mantis, mantid cicada, cicala leafhopper lacewing, lacewing fly dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk damselfly admiral ringlet, ringlet butterfly monarch, monarch butterfly, milkweed butterfly, Danaus plexippus cabbage butterfly sulphur butterfly, sulfur butterfly lycaenid, lycaenid butterfly starfish, sea star sea urchin sea cucumber, holothurian wood rabbit, cottontail, cottontail rabbit hare Angora, Angora rabbit hamster porcupine, hedgehog fox squirrel, eastern fox squirrel, Sciurus niger marmot beaver guinea pig, Cavia cobaya sorrel zebra hog, pig, grunter, squealer, Sus scrofa wild boar, boar, Sus scrofa warthog hippopotamus, hippo, river horse, Hippopotamus amphibius ox water buffalo, water ox, Asiatic buffalo, Bubalus bubalis bison ram, tup bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis ibex, Capra ibex hartebeest impala, Aepyceros melampus gazelle Arabian camel, dromedary, Camelus dromedarius llama weasel mink polecat, fitch, foulmart, foumart, Mustela putorius black-footed ferret, ferret, Mustela nigripes otter skunk, polecat, wood pussy badger armadillo three-toed sloth, ai, Bradypus tridactylus orangutan, orang, orangutang, Pongo pygmaeus gorilla, Gorilla gorilla chimpanzee, chimp, Pan troglodytes gibbon, Hylobates lar siamang, Hylobates syndactylus, Symphalangus syndactylus guenon, guenon monkey patas, hussar monkey, Erythrocebus patas baboon macaque langur colobus, colobus monkey proboscis monkey, Nasalis larvatus marmoset capuchin, ringtail, Cebus capucinus howler monkey, howler titi, titi monkey spider monkey, Ateles geoffroyi squirrel monkey, Saimiri sciureus Madagascar cat, ring-tailed lemur, Lemur catta indri, indris, Indri indri, Indri brevicaudatus Indian elephant, Elephas maximus African elephant, Loxodonta africana lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca barracouta, snoek eel coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch rock beauty, Holocanthus tricolor anemone fish sturgeon gar, garfish, garpike, billfish, Lepisosteus osseus lionfish puffer, pufferfish, blowfish, globefish abacus abaya academic gown, academic robe, judge's robe accordion, piano accordion, squeeze box acoustic guitar aircraft carrier, carrier, flattop, attack aircraft carrier airliner airship, dirigible altar ambulance amphibian, amphibious vehicle analog clock apiary, bee house apron ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin assault rifle, assault gun backpack, back pack, knapsack, packsack, rucksack, haversack bakery, bakeshop, bakehouse balance beam, beam balloon ballpoint, ballpoint pen, ballpen, Biro Band Aid banjo bannister, banister, balustrade, balusters, handrail barbell barber chair barbershop barn barometer barrel, cask barrow, garden cart, lawn cart, wheelbarrow baseball basketball bassinet bassoon bathing cap, swimming cap bath towel bathtub, bathing tub, bath, tub beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon beacon, lighthouse, beacon light, pharos beaker bearskin, busby, shako beer bottle beer glass bell cote, bell cot bib bicycle-built-for-two, tandem bicycle, tandem bikini, two-piece binder, ring-binder binoculars, field glasses, opera glasses birdhouse boathouse bobsled, bobsleigh, bob bolo tie, bolo, bola tie, bola bonnet, poke bonnet bookcase bookshop, bookstore, bookstall bottlecap bow bow tie, bow-tie, bowtie brass, memorial tablet, plaque brassiere, bra, bandeau breakwater, groin, groyne, mole, bulwark, seawall, jetty breastplate, aegis, egis broom bucket, pail buckle bulletproof vest bullet train, bullet butcher shop, meat market cab, hack, taxi, taxicab caldron, cauldron candle, taper, wax light cannon canoe can opener, tin opener cardigan car mirror carousel, carrousel, merry-go-round, roundabout, whirligig carpenter's kit, tool kit carton car wheel cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM cassette cassette player castle catamaran CD player cello, violoncello cellular telephone, cellular phone, cellphone, cell, mobile phone chain chainlink fence chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour chain saw, chainsaw chest chiffonier, commode chime, bell, gong china cabinet, china closet Christmas stocking church, church building cinema, movie theater, movie theatre, movie house, picture palace cleaver, meat cleaver, chopper cliff dwelling cloak clog, geta, patten, sabot cocktail shaker coffee mug coffeepot coil, spiral, volute, whorl, helix combination lock computer keyboard, keypad confectionery, confectionary, candy store container ship, containership, container vessel convertible corkscrew, bottle screw cornet, horn, trumpet, trump cowboy boot cowboy hat, ten-gallon hat cradle crane crash helmet crate crib, cot Crock Pot croquet ball crutch cuirass dam, dike, dyke desk desktop computer dial telephone, dial phone diaper, nappy, napkin digital clock digital watch dining table, board dishrag, dishcloth dishwasher, dish washer, dishwashing machine disk brake, disc brake dock, dockage, docking facility dogsled, dog sled, dog sleigh dome doormat, welcome mat drilling platform, offshore rig drum, membranophone, tympan drumstick dumbbell Dutch oven electric fan, blower electric guitar electric locomotive entertainment center envelope espresso maker face powder feather boa, boa file, file cabinet, filing cabinet fireboat fire engine, fire truck fire screen, fireguard flagpole, flagstaff flute, transverse flute folding chair football helmet forklift fountain fountain pen four-poster freight car French horn, horn frying pan, frypan, skillet fur coat garbage truck, dustcart gasmask, respirator, gas helmet gas pump, gasoline pump, petrol pump, island dispenser goblet go-kart golf ball golfcart, golf cart gondola gong, tam-tam gown grand piano, grand greenhouse, nursery, glasshouse grille, radiator grille grocery store, grocery, food market, market guillotine hair slide hair spray half track hammer hamper hand blower, blow dryer, blow drier, hair dryer, hair drier hand-held computer, hand-held microcomputer handkerchief, hankie, hanky, hankey hard disc, hard disk, fixed disk harmonica, mouth organ, harp, mouth harp harp harvester, reaper hatchet holster home theater, home theatre honeycomb hook, claw hoopskirt, crinoline horizontal bar, high bar horse cart, horse-cart hourglass iPod iron, smoothing iron jack-o'-lantern jean, blue jean, denim jeep, landrover jersey, T-shirt, tee shirt jigsaw puzzle jinrikisha, ricksha, rickshaw joystick kimono knee pad knot lab coat, laboratory coat ladle lampshade, lamp shade laptop, laptop computer lawn mower, mower lens cap, lens cover letter opener, paper knife, paperknife library lifeboat lighter, light, igniter, ignitor limousine, limo liner, ocean liner lipstick, lip rouge Loafer lotion loudspeaker, speaker, speaker unit, loudspeaker system, speaker system loupe, jeweler's loupe lumbermill, sawmill magnetic compass mailbag, postbag mailbox, letter box maillot maillot, tank suit manhole cover maraca marimba, xylophone mask matchstick maypole maze, labyrinth measuring cup medicine chest, medicine cabinet megalith, megalithic structure microphone, mike microwave, microwave oven military uniform milk can minibus miniskirt, mini minivan missile mitten mixing bowl mobile home, manufactured home Model T modem monastery monitor moped mortar mortarboard mosque mosquito net motor scooter, scooter mountain bike, all-terrain bike, off-roader mountain tent mouse, computer mouse mousetrap moving van muzzle nail neck brace necklace nipple notebook, notebook computer obelisk oboe, hautboy, hautbois ocarina, sweet potato odometer, hodometer, mileometer, milometer oil filter organ, pipe organ oscilloscope, scope, cathode-ray oscilloscope, CRO overskirt oxcart oxygen mask packet paddle, boat paddle paddlewheel, paddle wheel padlock paintbrush pajama, pyjama, pj's, jammies palace panpipe, pandean pipe, syrinx paper towel parachute, chute parallel bars, bars park bench parking meter passenger car, coach, carriage patio, terrace pay-phone, pay-station pedestal, plinth, footstall pencil box, pencil case pencil sharpener perfume, essence Petri dish photocopier pick, plectrum, plectron pickelhaube picket fence, paling pickup, pickup truck pier piggy bank, penny bank pill bottle pillow ping-pong ball pinwheel pirate, pirate ship pitcher, ewer plane, carpenter's plane, woodworking plane planetarium plastic bag plate rack plow, plough plunger, plumber's helper Polaroid camera, Polaroid Land camera pole police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria poncho pool table, billiard table, snooker table pop bottle, soda bottle pot, flowerpot potter's wheel power drill prayer rug, prayer mat printer prison, prison house projectile, missile projector puck, hockey puck punching bag, punch bag, punching ball, punchball purse quill, quill pen quilt, comforter, comfort, puff racer, race car, racing car racket, racquet radiator radio, wireless radio telescope, radio reflector rain barrel recreational vehicle, RV, R.V. reel reflex camera refrigerator, icebox remote control, remote restaurant, eating house, eating place, eatery revolver, six-gun, six-shooter rifle rocking chair, rocker rotisserie rubber eraser, rubber, pencil eraser rugby ball rule, ruler running shoe safe safety pin saltshaker, salt shaker sandal sarong sax, saxophone scabbard scale, weighing machine school bus schooner scoreboard screen, CRT screen screw screwdriver seat belt, seatbelt sewing machine shield, buckler shoe shop, shoe-shop, shoe store shoji shopping basket shopping cart shovel shower cap shower curtain ski ski mask sleeping bag slide rule, slipstick sliding door slot, one-armed bandit snorkel snowmobile snowplow, snowplough soap dispenser soccer ball sock solar dish, solar collector, solar furnace sombrero soup bowl space bar space heater space shuttle spatula speedboat spider web, spider's web spindle sports car, sport car spotlight, spot stage steam locomotive steel arch bridge steel drum stethoscope stole stone wall stopwatch, stop watch stove strainer streetcar, tram, tramcar, trolley, trolley car stretcher studio couch, day bed stupa, tope submarine, pigboat, sub, U-boat suit, suit of clothes sundial sunglass sunglasses, dark glasses, shades sunscreen, sunblock, sun blocker suspension bridge swab, swob, mop sweatshirt swimming trunks, bathing trunks swing switch, electric switch, electrical switch syringe table lamp tank, army tank, armored combat vehicle, armoured combat vehicle tape player teapot teddy, teddy bear television, television system tennis ball thatch, thatched roof theater curtain, theatre curtain thimble thresher, thrasher, threshing machine throne tile roof toaster tobacco shop, tobacconist shop, tobacconist toilet seat torch totem pole tow truck, tow car, wrecker toyshop tractor trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi tray trench coat tricycle, trike, velocipede trimaran tripod triumphal arch trolleybus, trolley coach, trackless trolley trombone tub, vat turnstile typewriter keyboard umbrella unicycle, monocycle upright, upright piano vacuum, vacuum cleaner vase vault velvet vending machine vestment viaduct violin, fiddle volleyball waffle iron wall clock wallet, billfold, notecase, pocketbook wardrobe, closet, press warplane, military plane washbasin, handbasin, washbowl, lavabo, wash-hand basin washer, automatic washer, washing machine water bottle water jug water tower whiskey jug whistle wig window screen window shade Windsor tie wine bottle wing wok wooden spoon wool, woolen, woollen worm fence, snake fence, snake-rail fence, Virginia fence wreck yawl yurt web site, website, internet site, site comic book crossword puzzle, crossword street sign traffic light, traffic signal, stoplight book jacket, dust cover, dust jacket, dust wrapper menu plate guacamole consomme hot pot, hotpot trifle ice cream, icecream ice lolly, lolly, lollipop, popsicle French loaf bagel, beigel pretzel cheeseburger hotdog, hot dog, red hot mashed potato head cabbage broccoli cauliflower zucchini, courgette spaghetti squash acorn squash butternut squash cucumber, cuke artichoke, globe artichoke bell pepper cardoon mushroom Granny Smith strawberry orange lemon fig pineapple, ananas banana jackfruit, jak, jack custard apple pomegranate hay carbonara chocolate sauce, chocolate syrup dough meat loaf, meatloaf pizza, pizza pie potpie burrito red wine espresso cup eggnog alp bubble cliff, drop, drop-off coral reef geyser lakeside, lakeshore promontory, headland, head, foreland sandbar, sand bar seashore, coast, seacoast, sea-coast valley, vale volcano ballplayer, baseball player groom, bridegroom scuba diver rapeseed daisy yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum corn acorn hip, rose hip, rosehip buckeye, horse chestnut, conker coral fungus agaric gyromitra stinkhorn, carrion fungus earthstar hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa bolete ear, spike, capitulum toilet tissue, toilet paper, bathroom tissue ================================================ FILE: pytorch_classification/grad_cam/imagenet21k_classes.txt ================================================ organism, being benthos heterotroph cell person, individual, someone, somebody, mortal, soul animal, animate_being, beast, brute, creature, fauna plant, flora, plant_life food, nutrient artifact, artefact hop check-in dressage curvet, vaulting piaffe funambulism, tightrope_walking rock_climbing contact_sport outdoor_sport, field_sport gymnastics, gymnastic_exercise acrobatics, tumbling track_and_field track, running jumping broad_jump, long_jump high_jump Fosbury_flop skiing cross-country_skiing ski_jumping water_sport, aquatics swimming, swim bathe dip, plunge dive, diving floating, natation dead-man's_float, prone_float belly_flop, belly_flopper, belly_whop, belly_whopper cliff_diving flip gainer, full_gainer half_gainer jackknife swan_dive, swallow_dive skin_diving, skin-dive scuba_diving snorkeling, snorkel_diving surfing, surfboarding, surfriding water-skiing rowing, row sculling boxing, pugilism, fisticuffs professional_boxing in-fighting fight rope-a-dope spar, sparring archery sledding tobogganing luging bobsledding wrestling, rassling, grappling Greco-Roman_wrestling professional_wrestling sumo skating ice_skating figure_skating rollerblading roller_skating skateboarding speed_skating racing auto_racing, car_racing boat_racing hydroplane_racing camel_racing greyhound_racing horse_racing riding, horseback_riding, equitation equestrian_sport pony-trekking showjumping, stadium_jumping cross-country_riding, cross-country_jumping cycling bicycling motorcycling dune_cycling blood_sport bullfighting, tauromachy cockfighting hunt, hunting battue beagling coursing deer_hunting, deer_hunt ducking, duck_hunting fox_hunting, foxhunt pigsticking fishing, sportfishing angling fly-fishing troll, trolling casting, cast bait_casting fly_casting overcast surf_casting, surf_fishing day_game athletic_game ice_hockey, hockey, hockey_game tetherball water_polo outdoor_game golf, golf_game professional_golf round_of_golf, round medal_play, stroke_play match_play miniature_golf croquet quoits, horseshoes shuffleboard, shovelboard field_game field_hockey, hockey shinny, shinney football, football_game American_football, American_football_game professional_football touch_football hurling rugby, rugby_football, rugger ball_game, ballgame baseball, baseball_game ball professional_baseball hardball perfect_game no-hit_game, no-hitter one-hitter, 1-hitter two-hitter, 2-hitter three-hitter, 3-hitter four-hitter, 4-hitter five-hitter, 5-hitter softball, softball_game rounders stickball, stickball_game cricket lacrosse polo pushball soccer, association_football court_game handball racquetball fives squash, squash_racquets, squash_rackets volleyball, volleyball_game jai_alai, pelota badminton battledore, battledore_and_shuttlecock basketball, basketball_game, hoops professional_basketball deck_tennis netball tennis, lawn_tennis professional_tennis singles singles doubles doubles royal_tennis, real_tennis, court_tennis pallone sport, athletics clasp, clench, clutch, clutches, grasp, grip, hold judo team_sport Last_Supper, Lord's_Supper Seder, Passover_supper camping, encampment, bivouacking, tenting pest critter creepy-crawly darter peeper homeotherm, homoiotherm, homotherm poikilotherm, ectotherm range_animal scavenger bottom-feeder, bottom-dweller bottom-feeder work_animal beast_of_burden, jument draft_animal pack_animal, sumpter domestic_animal, domesticated_animal feeder feeder stocker hatchling head migrator molter, moulter pet stayer stunt marine_animal, marine_creature, sea_animal, sea_creature by-catch, bycatch female hen male adult young, offspring orphan young_mammal baby pup, whelp wolf_pup, wolf_cub puppy cub, young_carnivore lion_cub bear_cub tiger_cub kit suckling sire dam thoroughbred, purebred, pureblood giant mutant carnivore herbivore insectivore acrodont pleurodont microorganism, micro-organism monohybrid arbovirus, arborvirus adenovirus arenavirus Marburg_virus Arenaviridae vesiculovirus Reoviridae variola_major, variola_major_virus viroid, virusoid coliphage paramyxovirus poliovirus herpes, herpes_virus herpes_simplex_1, HS1, HSV-1, HSV-I herpes_zoster, herpes_zoster_virus herpes_varicella_zoster, herpes_varicella_zoster_virus cytomegalovirus, CMV varicella_zoster_virus polyoma, polyoma_virus lyssavirus reovirus rotavirus moneran, moneron archaebacteria, archaebacterium, archaeobacteria, archeobacteria bacteroid Bacillus_anthracis, anthrax_bacillus Yersinia_pestis Brucella spirillum, spirilla botulinus, botulinum, Clostridium_botulinum clostridium_perfringens cyanobacteria, blue-green_algae trichodesmium nitric_bacteria, nitrobacteria spirillum Francisella, genus_Francisella gonococcus, Neisseria_gonorrhoeae Corynebacterium_diphtheriae, C._diphtheriae, Klebs-Loeffler_bacillus enteric_bacteria, enterobacteria, enterics, entric klebsiella Salmonella_typhimurium typhoid_bacillus, Salmonella_typhosa, Salmonella_typhi nitrate_bacterium, nitric_bacterium nitrite_bacterium, nitrous_bacterium actinomycete streptomyces Streptomyces_erythreus Streptomyces_griseus tubercle_bacillus, Mycobacterium_tuberculosis pus-forming_bacteria streptobacillus myxobacteria, myxobacterium, myxobacter, gliding_bacteria, slime_bacteria staphylococcus, staphylococci, staph diplococcus pneumococcus, Diplococcus_pneumoniae streptococcus, streptococci, strep spirochete, spirochaete planktonic_algae zooplankton parasite endoparasite, entoparasite, entozoan, entozoon, endozoan ectoparasite, ectozoan, ectozoon, epizoan, epizoon pathogen commensal myrmecophile protoctist protozoan, protozoon sarcodinian, sarcodine heliozoan endameba ameba, amoeba globigerina testacean arcella difflugia ciliate, ciliated_protozoan, ciliophoran paramecium, paramecia stentor alga, algae arame seagrass golden_algae yellow-green_algae brown_algae kelp fucoid, fucoid_algae fucoid fucus bladderwrack, Ascophyllum_nodosum green_algae, chlorophyte pond_scum chlorella stonewort desmid sea_moss eukaryote, eucaryote prokaryote, procaryote zooid Leishmania, genus_Leishmania zoomastigote, zooflagellate polymastigote costia, Costia_necatrix giardia cryptomonad, cryptophyte sporozoan sporozoite trophozoite merozoite coccidium, eimeria gregarine plasmodium, Plasmodium_vivax, malaria_parasite leucocytozoan, leucocytozoon microsporidian Ostariophysi, order_Ostariophysi cypriniform_fish loach cyprinid, cyprinid_fish carp domestic_carp, Cyprinus_carpio leather_carp mirror_carp European_bream, Abramis_brama tench, Tinca_tinca dace, Leuciscus_leuciscus chub, Leuciscus_cephalus shiner common_shiner, silversides, Notropis_cornutus roach, Rutilus_rutilus rudd, Scardinius_erythrophthalmus minnow, Phoxinus_phoxinus gudgeon, Gobio_gobio goldfish, Carassius_auratus crucian_carp, Carassius_carassius, Carassius_vulgaris electric_eel, Electrophorus_electric catostomid buffalo_fish, buffalofish black_buffalo, Ictiobus_niger hog_sucker, hog_molly, Hypentelium_nigricans redhorse, redhorse_sucker cyprinodont killifish mummichog, Fundulus_heteroclitus striped_killifish, mayfish, may_fish, Fundulus_majalis rivulus flagfish, American_flagfish, Jordanella_floridae swordtail, helleri, topminnow, Xyphophorus_helleri guppy, rainbow_fish, Lebistes_reticulatus topminnow, poeciliid_fish, poeciliid, live-bearer mosquitofish, Gambusia_affinis platy, Platypoecilus_maculatus mollie, molly squirrelfish reef_squirrelfish, Holocentrus_coruscus deepwater_squirrelfish, Holocentrus_bullisi Holocentrus_ascensionis soldierfish, soldier-fish anomalops, flashlight_fish flashlight_fish, Photoblepharon_palpebratus John_Dory, Zeus_faber boarfish, Capros_aper boarfish cornetfish stickleback, prickleback three-spined_stickleback, Gasterosteus_aculeatus ten-spined_stickleback, Gasterosteus_pungitius pipefish, needlefish dwarf_pipefish, Syngnathus_hildebrandi deepwater_pipefish, Cosmocampus_profundus seahorse, sea_horse snipefish, bellows_fish shrimpfish, shrimp-fish trumpetfish, Aulostomus_maculatus pellicle embryo, conceptus, fertilized_egg fetus, foetus abortus spawn blastula, blastosphere blastocyst, blastodermic_vessicle gastrula morula yolk, vitellus chordate cephalochordate lancelet, amphioxus tunicate, urochordate, urochord ascidian sea_squirt salp, salpa doliolum larvacean appendicularia ascidian_tadpole vertebrate, craniate Amniota amniote aquatic_vertebrate jawless_vertebrate, jawless_fish, agnathan ostracoderm heterostracan anaspid conodont cyclostome lamprey, lamprey_eel, lamper_eel sea_lamprey, Petromyzon_marinus hagfish, hag, slime_eels Myxine_glutinosa eptatretus gnathostome placoderm cartilaginous_fish, chondrichthian holocephalan, holocephalian chimaera rabbitfish, Chimaera_monstrosa elasmobranch, selachian shark cow_shark, six-gilled_shark, Hexanchus_griseus mackerel_shark porbeagle, Lamna_nasus mako, mako_shark shortfin_mako, Isurus_oxyrhincus longfin_mako, Isurus_paucus bonito_shark, blue_pointed, Isurus_glaucus great_white_shark, white_shark, man-eater, man-eating_shark, Carcharodon_carcharias basking_shark, Cetorhinus_maximus thresher, thrasher, thresher_shark, fox_shark, Alopius_vulpinus carpet_shark, Orectolobus_barbatus nurse_shark, Ginglymostoma_cirratum sand_tiger, sand_shark, Carcharias_taurus, Odontaspis_taurus whale_shark, Rhincodon_typus requiem_shark bull_shark, cub_shark, Carcharhinus_leucas sandbar_shark, Carcharhinus_plumbeus blacktip_shark, sandbar_shark, Carcharhinus_limbatus whitetip_shark, oceanic_whitetip_shark, white-tipped_shark, Carcharinus_longimanus dusky_shark, Carcharhinus_obscurus lemon_shark, Negaprion_brevirostris blue_shark, great_blue_shark, Prionace_glauca tiger_shark, Galeocerdo_cuvieri soupfin_shark, soupfin, soup-fin, Galeorhinus_zyopterus dogfish smooth_dogfish smoothhound, smoothhound_shark, Mustelus_mustelus American_smooth_dogfish, Mustelus_canis Florida_smoothhound, Mustelus_norrisi whitetip_shark, reef_whitetip_shark, Triaenodon_obseus spiny_dogfish Atlantic_spiny_dogfish, Squalus_acanthias Pacific_spiny_dogfish, Squalus_suckleyi hammerhead, hammerhead_shark smooth_hammerhead, Sphyrna_zygaena smalleye_hammerhead, Sphyrna_tudes shovelhead, bonnethead, bonnet_shark, Sphyrna_tiburo angel_shark, angelfish, Squatina_squatina, monkfish ray electric_ray, crampfish, numbfish, torpedo sawfish smalltooth_sawfish, Pristis_pectinatus guitarfish stingray roughtail_stingray, Dasyatis_centroura butterfly_ray eagle_ray spotted_eagle_ray, spotted_ray, Aetobatus_narinari cownose_ray, cow-nosed_ray, Rhinoptera_bonasus manta, manta_ray, devilfish Atlantic_manta, Manta_birostris devil_ray, Mobula_hypostoma skate grey_skate, gray_skate, Raja_batis little_skate, Raja_erinacea thorny_skate, Raja_radiata barndoor_skate, Raja_laevis bird dickeybird, dickey-bird, dickybird, dicky-bird fledgling, fledgeling nestling, baby_bird cock gamecock, fighting_cock hen nester night_bird night_raven bird_of_passage archaeopteryx, archeopteryx, Archaeopteryx_lithographica archaeornis ratite, ratite_bird, flightless_bird carinate, carinate_bird, flying_bird ostrich, Struthio_camelus cassowary emu, Dromaius_novaehollandiae, Emu_novaehollandiae kiwi, apteryx rhea, Rhea_americana rhea, nandu, Pterocnemia_pennata elephant_bird, aepyornis moa passerine, passeriform_bird nonpasserine_bird oscine, oscine_bird songbird, songster honey_eater, honeysucker accentor hedge_sparrow, sparrow, dunnock, Prunella_modularis lark skylark, Alauda_arvensis wagtail pipit, titlark, lark meadow_pipit, Anthus_pratensis finch chaffinch, Fringilla_coelebs brambling, Fringilla_montifringilla goldfinch, Carduelis_carduelis linnet, lintwhite, Carduelis_cannabina siskin, Carduelis_spinus red_siskin, Carduelis_cucullata redpoll, Carduelis_flammea redpoll, Carduelis_hornemanni New_World_goldfinch, goldfinch, yellowbird, Spinus_tristis pine_siskin, pine_finch, Spinus_pinus house_finch, linnet, Carpodacus_mexicanus purple_finch, Carpodacus_purpureus canary, canary_bird common_canary, Serinus_canaria serin crossbill, Loxia_curvirostra bullfinch, Pyrrhula_pyrrhula junco, snowbird dark-eyed_junco, slate-colored_junco, Junco_hyemalis New_World_sparrow vesper_sparrow, grass_finch, Pooecetes_gramineus white-throated_sparrow, whitethroat, Zonotrichia_albicollis white-crowned_sparrow, Zonotrichia_leucophrys chipping_sparrow, Spizella_passerina field_sparrow, Spizella_pusilla tree_sparrow, Spizella_arborea song_sparrow, Melospiza_melodia swamp_sparrow, Melospiza_georgiana bunting indigo_bunting, indigo_finch, indigo_bird, Passerina_cyanea ortolan, ortolan_bunting, Emberiza_hortulana reed_bunting, Emberiza_schoeniclus yellowhammer, yellow_bunting, Emberiza_citrinella yellow-breasted_bunting, Emberiza_aureola snow_bunting, snowbird, snowflake, Plectrophenax_nivalis honeycreeper banana_quit sparrow, true_sparrow English_sparrow, house_sparrow, Passer_domesticus tree_sparrow, Passer_montanus grosbeak, grossbeak evening_grosbeak, Hesperiphona_vespertina hawfinch, Coccothraustes_coccothraustes pine_grosbeak, Pinicola_enucleator cardinal, cardinal_grosbeak, Richmondena_Cardinalis, Cardinalis_cardinalis, redbird pyrrhuloxia, Pyrrhuloxia_sinuata towhee chewink, cheewink, Pipilo_erythrophthalmus green-tailed_towhee, Chlorura_chlorura weaver, weaverbird, weaver_finch baya, Ploceus_philippinus whydah, whidah, widow_bird Java_sparrow, Java_finch, ricebird, Padda_oryzivora avadavat, amadavat grassfinch, grass_finch zebra_finch, Poephila_castanotis honeycreeper, Hawaiian_honeycreeper lyrebird scrubbird, scrub-bird, scrub_bird broadbill tyrannid New_World_flycatcher, flycatcher, tyrant_flycatcher, tyrant_bird kingbird, Tyrannus_tyrannus Arkansas_kingbird, western_kingbird Cassin's_kingbird, Tyrannus_vociferans eastern_kingbird grey_kingbird, gray_kingbird, petchary, Tyrannus_domenicensis_domenicensis pewee, peewee, peewit, pewit, wood_pewee, Contopus_virens western_wood_pewee, Contopus_sordidulus phoebe, phoebe_bird, Sayornis_phoebe vermillion_flycatcher, firebird, Pyrocephalus_rubinus_mexicanus cotinga, chatterer cock_of_the_rock, Rupicola_rupicola cock_of_the_rock, Rupicola_peruviana manakin bellbird umbrella_bird, Cephalopterus_ornatus ovenbird antbird, ant_bird ant_thrush ant_shrike spotted_antbird, Hylophylax_naevioides woodhewer, woodcreeper, wood-creeper, tree_creeper pitta scissortail, scissortailed_flycatcher, Muscivora-forficata Old_World_flycatcher, true_flycatcher, flycatcher spotted_flycatcher, Muscicapa_striata, Muscicapa_grisola thickhead, whistler thrush missel_thrush, mistle_thrush, mistletoe_thrush, Turdus_viscivorus song_thrush, mavis, throstle, Turdus_philomelos fieldfare, snowbird, Turdus_pilaris redwing, Turdus_iliacus blackbird, merl, merle, ouzel, ousel, European_blackbird, Turdus_merula ring_ouzel, ring_blackbird, ring_thrush, Turdus_torquatus robin, American_robin, Turdus_migratorius clay-colored_robin, Turdus_greyi hermit_thrush, Hylocichla_guttata veery, Wilson's_thrush, Hylocichla_fuscescens wood_thrush, Hylocichla_mustelina nightingale, Luscinia_megarhynchos thrush_nightingale, Luscinia_luscinia bulbul Old_World_chat, chat stonechat, Saxicola_torquata whinchat, Saxicola_rubetra solitaire redstart, redtail wheatear bluebird robin, redbreast, robin_redbreast, Old_World_robin, Erithacus_rubecola bluethroat, Erithacus_svecicus warbler gnatcatcher kinglet goldcrest, golden-crested_kinglet, Regulus_regulus gold-crowned_kinglet, Regulus_satrata ruby-crowned_kinglet, ruby-crowned_wren, Regulus_calendula Old_World_warbler, true_warbler blackcap, Silvia_atricapilla greater_whitethroat, whitethroat, Sylvia_communis lesser_whitethroat, whitethroat, Sylvia_curruca wood_warbler, Phylloscopus_sibilatrix sedge_warbler, sedge_bird, sedge_wren, reedbird, Acrocephalus_schoenobaenus wren_warbler tailorbird, Orthotomus_sutorius babbler, cackler New_World_warbler, wood_warbler parula_warbler, northern_parula, Parula_americana Wilson's_warbler, Wilson's_blackcap, Wilsonia_pusilla flycatching_warbler American_redstart, redstart, Setophaga_ruticilla Cape_May_warbler, Dendroica_tigrina yellow_warbler, golden_warbler, yellowbird, Dendroica_petechia Blackburn, Blackburnian_warbler, Dendroica_fusca Audubon's_warbler, Audubon_warbler, Dendroica_auduboni myrtle_warbler, myrtle_bird, Dendroica_coronata blackpoll, Dendroica_striate New_World_chat, chat yellow-breasted_chat, Icteria_virens ovenbird, Seiurus_aurocapillus water_thrush yellowthroat common_yellowthroat, Maryland_yellowthroat, Geothlypis_trichas riflebird, Ptloris_paradisea New_World_oriole, American_oriole, oriole northern_oriole, Icterus_galbula Baltimore_oriole, Baltimore_bird, hangbird, firebird, Icterus_galbula_galbula Bullock's_oriole, Icterus_galbula_bullockii orchard_oriole, Icterus_spurius meadowlark, lark eastern_meadowlark, Sturnella_magna western_meadowlark, Sturnella_neglecta cacique, cazique bobolink, ricebird, reedbird, Dolichonyx_oryzivorus New_World_blackbird, blackbird grackle, crow_blackbird purple_grackle, Quiscalus_quiscula rusty_blackbird, rusty_grackle, Euphagus_carilonus cowbird red-winged_blackbird, redwing, Agelaius_phoeniceus Old_World_oriole, oriole golden_oriole, Oriolus_oriolus fig-bird starling common_starling, Sturnus_vulgaris rose-colored_starling, rose-colored_pastor, Pastor_sturnus, Pastor_roseus myna, mynah, mina, minah, myna_bird, mynah_bird crested_myna, Acridotheres_tristis hill_myna, Indian_grackle, grackle, Gracula_religiosa corvine_bird crow American_crow, Corvus_brachyrhyncos raven, Corvus_corax rook, Corvus_frugilegus jackdaw, daw, Corvus_monedula chough jay Old_World_jay common_European_jay, Garullus_garullus New_World_jay blue_jay, jaybird, Cyanocitta_cristata Canada_jay, grey_jay, gray_jay, camp_robber, whisker_jack, Perisoreus_canadensis Rocky_Mountain_jay, Perisoreus_canadensis_capitalis nutcracker common_nutcracker, Nucifraga_caryocatactes Clark's_nutcracker, Nucifraga_columbiana magpie European_magpie, Pica_pica American_magpie, Pica_pica_hudsonia Australian_magpie butcherbird currawong, bell_magpie piping_crow, piping_crow-shrike, Gymnorhina_tibicen wren, jenny_wren winter_wren, Troglodytes_troglodytes house_wren, Troglodytes_aedon marsh_wren long-billed_marsh_wren, Cistothorus_palustris sedge_wren, short-billed_marsh_wren, Cistothorus_platensis rock_wren, Salpinctes_obsoletus Carolina_wren, Thryothorus_ludovicianus cactus_wren mockingbird, mocker, Mimus_polyglotktos blue_mockingbird, Melanotis_caerulescens catbird, grey_catbird, gray_catbird, Dumetella_carolinensis thrasher, mocking_thrush brown_thrasher, brown_thrush, Toxostoma_rufums New_Zealand_wren rock_wren, Xenicus_gilviventris rifleman_bird, Acanthisitta_chloris creeper, tree_creeper brown_creeper, American_creeper, Certhia_americana European_creeper, Certhia_familiaris wall_creeper, tichodrome, Tichodroma_muriaria European_nuthatch, Sitta_europaea red-breasted_nuthatch, Sitta_canadensis white-breasted_nuthatch, Sitta_carolinensis titmouse, tit chickadee black-capped_chickadee, blackcap, Parus_atricapillus tufted_titmouse, Parus_bicolor Carolina_chickadee, Parus_carolinensis blue_tit, tomtit, Parus_caeruleus bushtit, bush_tit wren-tit, Chamaea_fasciata verdin, Auriparus_flaviceps fairy_bluebird, bluebird swallow barn_swallow, chimney_swallow, Hirundo_rustica cliff_swallow, Hirundo_pyrrhonota tree_swallow, tree_martin, Hirundo_nigricans white-bellied_swallow, tree_swallow, Iridoprocne_bicolor martin house_martin, Delichon_urbica bank_martin, bank_swallow, sand_martin, Riparia_riparia purple_martin, Progne_subis wood_swallow, swallow_shrike tanager scarlet_tanager, Piranga_olivacea, redbird, firebird western_tanager, Piranga_ludoviciana summer_tanager, summer_redbird, Piranga_rubra hepatic_tanager, Piranga_flava_hepatica shrike butcherbird European_shrike, Lanius_excubitor northern_shrike, Lanius_borealis white-rumped_shrike, Lanius_ludovicianus_excubitorides loggerhead_shrike, Lanius_lucovicianus migrant_shrike, Lanius_ludovicianus_migrans bush_shrike black-fronted_bush_shrike, Chlorophoneus_nigrifrons bowerbird, catbird satin_bowerbird, satin_bird, Ptilonorhynchus_violaceus great_bowerbird, Chlamydera_nuchalis water_ouzel, dipper European_water_ouzel, Cinclus_aquaticus American_water_ouzel, Cinclus_mexicanus vireo red-eyed_vireo, Vireo_olivaceous solitary_vireo, Vireo_solitarius blue-headed_vireo, Vireo_solitarius_solitarius waxwing cedar_waxwing, cedarbird, Bombycilla_cedrorun Bohemian_waxwing, Bombycilla_garrulus bird_of_prey, raptor, raptorial_bird Accipitriformes, order_Accipitriformes hawk eyas tiercel, tercel, tercelet goshawk, Accipiter_gentilis sparrow_hawk, Accipiter_nisus Cooper's_hawk, blue_darter, Accipiter_cooperii chicken_hawk, hen_hawk buteonine redtail, red-tailed_hawk, Buteo_jamaicensis rough-legged_hawk, roughleg, Buteo_lagopus red-shouldered_hawk, Buteo_lineatus buzzard, Buteo_buteo honey_buzzard, Pernis_apivorus kite black_kite, Milvus_migrans swallow-tailed_kite, swallow-tailed_hawk, Elanoides_forficatus white-tailed_kite, Elanus_leucurus harrier marsh_harrier, Circus_Aeruginosus Montagu's_harrier, Circus_pygargus marsh_hawk, northern_harrier, hen_harrier, Circus_cyaneus harrier_eagle, short-toed_eagle falcon peregrine, peregrine_falcon, Falco_peregrinus falcon-gentle, falcon-gentil gyrfalcon, gerfalcon, Falco_rusticolus kestrel, Falco_tinnunculus sparrow_hawk, American_kestrel, kestrel, Falco_sparverius pigeon_hawk, merlin, Falco_columbarius hobby, Falco_subbuteo caracara Audubon's_caracara, Polyborus_cheriway_audubonii carancha, Polyborus_plancus eagle, bird_of_Jove young_bird eaglet harpy, harpy_eagle, Harpia_harpyja golden_eagle, Aquila_chrysaetos tawny_eagle, Aquila_rapax bald_eagle, American_eagle, Haliaeetus_leucocephalus sea_eagle Kamchatkan_sea_eagle, Stellar's_sea_eagle, Haliaeetus_pelagicus ern, erne, grey_sea_eagle, gray_sea_eagle, European_sea_eagle, white-tailed_sea_eagle, Haliatus_albicilla fishing_eagle, Haliaeetus_leucorhyphus osprey, fish_hawk, fish_eagle, sea_eagle, Pandion_haliaetus vulture Aegypiidae, family_Aegypiidae Old_World_vulture griffon_vulture, griffon, Gyps_fulvus bearded_vulture, lammergeier, lammergeyer, Gypaetus_barbatus Egyptian_vulture, Pharaoh's_chicken, Neophron_percnopterus black_vulture, Aegypius_monachus secretary_bird, Sagittarius_serpentarius New_World_vulture, cathartid buzzard, turkey_buzzard, turkey_vulture, Cathartes_aura condor Andean_condor, Vultur_gryphus California_condor, Gymnogyps_californianus black_vulture, carrion_crow, Coragyps_atratus king_vulture, Sarcorhamphus_papa owl, bird_of_Minerva, bird_of_night, hooter owlet little_owl, Athene_noctua horned_owl great_horned_owl, Bubo_virginianus great_grey_owl, great_gray_owl, Strix_nebulosa tawny_owl, Strix_aluco barred_owl, Strix_varia screech_owl, Otus_asio screech_owl scops_owl spotted_owl, Strix_occidentalis Old_World_scops_owl, Otus_scops Oriental_scops_owl, Otus_sunia hoot_owl hawk_owl, Surnia_ulula long-eared_owl, Asio_otus laughing_owl, laughing_jackass, Sceloglaux_albifacies barn_owl, Tyto_alba amphibian Ichyostega urodele, caudate salamander European_fire_salamander, Salamandra_salamandra spotted_salamander, fire_salamander, Salamandra_maculosa alpine_salamander, Salamandra_atra newt, triton common_newt, Triturus_vulgaris red_eft, Notophthalmus_viridescens Pacific_newt rough-skinned_newt, Taricha_granulosa California_newt, Taricha_torosa eft ambystomid, ambystomid_salamander mole_salamander, Ambystoma_talpoideum spotted_salamander, Ambystoma_maculatum tiger_salamander, Ambystoma_tigrinum axolotl, mud_puppy, Ambystoma_mexicanum waterdog hellbender, mud_puppy, Cryptobranchus_alleganiensis giant_salamander, Megalobatrachus_maximus olm, Proteus_anguinus mud_puppy, Necturus_maculosus dicamptodon, dicamptodontid Pacific_giant_salamander, Dicamptodon_ensatus olympic_salamander, Rhyacotriton_olympicus lungless_salamander, plethodont eastern_red-backed_salamander, Plethodon_cinereus western_red-backed_salamander, Plethodon_vehiculum dusky_salamander climbing_salamander arboreal_salamander, Aneides_lugubris slender_salamander, worm_salamander web-toed_salamander Shasta_salamander, Hydromantes_shastae limestone_salamander, Hydromantes_brunus amphiuma, congo_snake, congo_eel, blind_eel siren frog, toad, toad_frog, anuran, batrachian, salientian true_frog, ranid wood-frog, wood_frog, Rana_sylvatica leopard_frog, spring_frog, Rana_pipiens bullfrog, Rana_catesbeiana green_frog, spring_frog, Rana_clamitans cascades_frog, Rana_cascadae goliath_frog, Rana_goliath pickerel_frog, Rana_palustris tarahumara_frog, Rana_tarahumarae grass_frog, Rana_temporaria leptodactylid_frog, leptodactylid robber_frog barking_frog, robber_frog, Hylactophryne_augusti crapaud, South_American_bullfrog, Leptodactylus_pentadactylus tree_frog, tree-frog tailed_frog, bell_toad, ribbed_toad, tailed_toad, Ascaphus_trui Liopelma_hamiltoni true_toad bufo agua, agua_toad, Bufo_marinus European_toad, Bufo_bufo natterjack, Bufo_calamita American_toad, Bufo_americanus Eurasian_green_toad, Bufo_viridis American_green_toad, Bufo_debilis Yosemite_toad, Bufo_canorus Texas_toad, Bufo_speciosus southwestern_toad, Bufo_microscaphus western_toad, Bufo_boreas obstetrical_toad, midwife_toad, Alytes_obstetricans midwife_toad, Alytes_cisternasi fire-bellied_toad, Bombina_bombina spadefoot, spadefoot_toad western_spadefoot, Scaphiopus_hammondii southern_spadefoot, Scaphiopus_multiplicatus plains_spadefoot, Scaphiopus_bombifrons tree_toad, tree_frog, tree-frog spring_peeper, Hyla_crucifer Pacific_tree_toad, Hyla_regilla canyon_treefrog, Hyla_arenicolor chameleon_tree_frog cricket_frog northern_cricket_frog, Acris_crepitans eastern_cricket_frog, Acris_gryllus chorus_frog lowland_burrowing_treefrog, northern_casque-headed_frog, Pternohyla_fodiens western_narrow-mouthed_toad, Gastrophryne_olivacea eastern_narrow-mouthed_toad, Gastrophryne_carolinensis sheep_frog tongueless_frog Surinam_toad, Pipa_pipa, Pipa_americana African_clawed_frog, Xenopus_laevis South_American_poison_toad caecilian, blindworm reptile, reptilian anapsid, anapsid_reptile diapsid, diapsid_reptile Diapsida, subclass_Diapsida chelonian, chelonian_reptile turtle sea_turtle, marine_turtle green_turtle, Chelonia_mydas loggerhead, loggerhead_turtle, Caretta_caretta ridley Atlantic_ridley, bastard_ridley, bastard_turtle, Lepidochelys_kempii Pacific_ridley, olive_ridley, Lepidochelys_olivacea hawksbill_turtle, hawksbill, hawkbill, tortoiseshell_turtle, Eretmochelys_imbricata leatherback_turtle, leatherback, leathery_turtle, Dermochelys_coriacea snapping_turtle common_snapping_turtle, snapper, Chelydra_serpentina alligator_snapping_turtle, alligator_snapper, Macroclemys_temmincki mud_turtle musk_turtle, stinkpot terrapin diamondback_terrapin, Malaclemys_centrata red-bellied_terrapin, red-bellied_turtle, redbelly, Pseudemys_rubriventris slider, yellow-bellied_terrapin, Pseudemys_scripta cooter, river_cooter, Pseudemys_concinna box_turtle, box_tortoise Western_box_turtle, Terrapene_ornata painted_turtle, painted_terrapin, painted_tortoise, Chrysemys_picta tortoise European_tortoise, Testudo_graeca giant_tortoise gopher_tortoise, gopher_turtle, gopher, Gopherus_polypemus desert_tortoise, Gopherus_agassizii Texas_tortoise soft-shelled_turtle, pancake_turtle spiny_softshell, Trionyx_spiniferus smooth_softshell, Trionyx_muticus tuatara, Sphenodon_punctatum saurian lizard gecko flying_gecko, fringed_gecko, Ptychozoon_homalocephalum banded_gecko iguanid, iguanid_lizard common_iguana, iguana, Iguana_iguana marine_iguana, Amblyrhynchus_cristatus desert_iguana, Dipsosaurus_dorsalis chuckwalla, Sauromalus_obesus zebra-tailed_lizard, gridiron-tailed_lizard, Callisaurus_draconoides fringe-toed_lizard, Uma_notata earless_lizard collared_lizard leopard_lizard spiny_lizard fence_lizard western_fence_lizard, swift, blue-belly, Sceloporus_occidentalis eastern_fence_lizard, pine_lizard, Sceloporus_undulatus sagebrush_lizard, Sceloporus_graciosus side-blotched_lizard, sand_lizard, Uta_stansburiana tree_lizard, Urosaurus_ornatus horned_lizard, horned_toad, horny_frog Texas_horned_lizard, Phrynosoma_cornutum basilisk American_chameleon, anole, Anolis_carolinensis worm_lizard night_lizard skink, scincid, scincid_lizard western_skink, Eumeces_skiltonianus mountain_skink, Eumeces_callicephalus teiid_lizard, teiid whiptail, whiptail_lizard racerunner, race_runner, six-lined_racerunner, Cnemidophorus_sexlineatus plateau_striped_whiptail, Cnemidophorus_velox Chihuahuan_spotted_whiptail, Cnemidophorus_exsanguis western_whiptail, Cnemidophorus_tigris checkered_whiptail, Cnemidophorus_tesselatus teju caiman_lizard agamid, agamid_lizard agama frilled_lizard, Chlamydosaurus_kingi moloch mountain_devil, spiny_lizard, Moloch_horridus anguid_lizard alligator_lizard blindworm, slowworm, Anguis_fragilis glass_lizard, glass_snake, joint_snake legless_lizard Lanthanotus_borneensis venomous_lizard Gila_monster, Heloderma_suspectum beaded_lizard, Mexican_beaded_lizard, Heloderma_horridum lacertid_lizard, lacertid sand_lizard, Lacerta_agilis green_lizard, Lacerta_viridis chameleon, chamaeleon African_chameleon, Chamaeleo_chamaeleon horned_chameleon, Chamaeleo_oweni monitor, monitor_lizard, varan African_monitor, Varanus_niloticus Komodo_dragon, Komodo_lizard, dragon_lizard, giant_lizard, Varanus_komodoensis crocodilian_reptile, crocodilian crocodile African_crocodile, Nile_crocodile, Crocodylus_niloticus Asian_crocodile, Crocodylus_porosus Morlett's_crocodile false_gavial, Tomistoma_schlegeli alligator, gator American_alligator, Alligator_mississipiensis Chinese_alligator, Alligator_sinensis caiman, cayman spectacled_caiman, Caiman_sclerops gavial, Gavialis_gangeticus armored_dinosaur stegosaur, stegosaurus, Stegosaur_stenops ankylosaur, ankylosaurus Edmontonia bone-headed_dinosaur pachycephalosaur, pachycephalosaurus ceratopsian, horned_dinosaur protoceratops triceratops styracosaur, styracosaurus psittacosaur, psittacosaurus ornithopod, ornithopod_dinosaur hadrosaur, hadrosaurus, duck-billed_dinosaur trachodon, trachodont saurischian, saurischian_dinosaur sauropod, sauropod_dinosaur apatosaur, apatosaurus, brontosaur, brontosaurus, thunder_lizard, Apatosaurus_excelsus barosaur, barosaurus diplodocus argentinosaur theropod, theropod_dinosaur, bird-footed_dinosaur ceratosaur, ceratosaurus coelophysis tyrannosaur, tyrannosaurus, Tyrannosaurus_rex allosaur, allosaurus ornithomimid maniraptor oviraptorid velociraptor deinonychus utahraptor, superslasher synapsid, synapsid_reptile dicynodont pelycosaur dimetrodon pterosaur, flying_reptile pterodactyl ichthyosaur ichthyosaurus stenopterygius, Stenopterygius_quadrisicissus plesiosaur, plesiosaurus nothosaur snake, serpent, ophidian colubrid_snake, colubrid hoop_snake thunder_snake, worm_snake, Carphophis_amoenus ringneck_snake, ring-necked_snake, ring_snake hognose_snake, puff_adder, sand_viper leaf-nosed_snake green_snake, grass_snake smooth_green_snake, Opheodrys_vernalis rough_green_snake, Opheodrys_aestivus green_snake racer blacksnake, black_racer, Coluber_constrictor blue_racer, Coluber_constrictor_flaviventris horseshoe_whipsnake, Coluber_hippocrepis whip-snake, whip_snake, whipsnake coachwhip, coachwhip_snake, Masticophis_flagellum California_whipsnake, striped_racer, Masticophis_lateralis Sonoran_whipsnake, Masticophis_bilineatus rat_snake corn_snake, red_rat_snake, Elaphe_guttata black_rat_snake, blacksnake, pilot_blacksnake, mountain_blacksnake, Elaphe_obsoleta chicken_snake Indian_rat_snake, Ptyas_mucosus glossy_snake, Arizona_elegans bull_snake, bull-snake gopher_snake, Pituophis_melanoleucus pine_snake king_snake, kingsnake common_kingsnake, Lampropeltis_getulus milk_snake, house_snake, milk_adder, checkered_adder, Lampropeltis_triangulum garter_snake, grass_snake common_garter_snake, Thamnophis_sirtalis ribbon_snake, Thamnophis_sauritus Western_ribbon_snake, Thamnophis_proximus lined_snake, Tropidoclonion_lineatum ground_snake, Sonora_semiannulata eastern_ground_snake, Potamophis_striatula, Haldea_striatula water_snake common_water_snake, banded_water_snake, Natrix_sipedon, Nerodia_sipedon water_moccasin grass_snake, ring_snake, ringed_snake, Natrix_natrix viperine_grass_snake, Natrix_maura red-bellied_snake, Storeria_occipitamaculata sand_snake banded_sand_snake, Chilomeniscus_cinctus black-headed_snake vine_snake lyre_snake Sonoran_lyre_snake, Trimorphodon_lambda night_snake, Hypsiglena_torquata blind_snake, worm_snake western_blind_snake, Leptotyphlops_humilis indigo_snake, gopher_snake, Drymarchon_corais eastern_indigo_snake, Drymarchon_corais_couperi constrictor boa boa_constrictor, Constrictor_constrictor rubber_boa, tow-headed_snake, Charina_bottae rosy_boa, Lichanura_trivirgata anaconda, Eunectes_murinus python carpet_snake, Python_variegatus, Morelia_spilotes_variegatus reticulated_python, Python_reticulatus Indian_python, Python_molurus rock_python, rock_snake, Python_sebae amethystine_python elapid, elapid_snake coral_snake, harlequin-snake, New_World_coral_snake eastern_coral_snake, Micrurus_fulvius western_coral_snake, Micruroides_euryxanthus coral_snake, Old_World_coral_snake African_coral_snake, Aspidelaps_lubricus Australian_coral_snake, Rhynchoelaps_australis copperhead, Denisonia_superba cobra Indian_cobra, Naja_naja asp, Egyptian_cobra, Naja_haje black-necked_cobra, spitting_cobra, Naja_nigricollis hamadryad, king_cobra, Ophiophagus_hannah, Naja_hannah ringhals, rinkhals, spitting_snake, Hemachatus_haemachatus mamba black_mamba, Dendroaspis_augusticeps green_mamba death_adder, Acanthophis_antarcticus tiger_snake, Notechis_scutatus Australian_blacksnake, Pseudechis_porphyriacus krait banded_krait, banded_adder, Bungarus_fasciatus taipan, Oxyuranus_scutellatus sea_snake viper adder, common_viper, Vipera_berus asp, asp_viper, Vipera_aspis puff_adder, Bitis_arietans gaboon_viper, Bitis_gabonica horned_viper, cerastes, sand_viper, horned_asp, Cerastes_cornutus pit_viper copperhead, Agkistrodon_contortrix water_moccasin, cottonmouth, cottonmouth_moccasin, Agkistrodon_piscivorus rattlesnake, rattler diamondback, diamondback_rattlesnake, Crotalus_adamanteus timber_rattlesnake, banded_rattlesnake, Crotalus_horridus_horridus canebrake_rattlesnake, canebrake_rattler, Crotalus_horridus_atricaudatus prairie_rattlesnake, prairie_rattler, Western_rattlesnake, Crotalus_viridis sidewinder, horned_rattlesnake, Crotalus_cerastes Western_diamondback, Western_diamondback_rattlesnake, Crotalus_atrox rock_rattlesnake, Crotalus_lepidus tiger_rattlesnake, Crotalus_tigris Mojave_rattlesnake, Crotalus_scutulatus speckled_rattlesnake, Crotalus_mitchellii massasauga, massasauga_rattler, Sistrurus_catenatus ground_rattler, massasauga, Sistrurus_miliaris fer-de-lance, Bothrops_atrops carcase, carcass carrion arthropod trilobite arachnid, arachnoid harvestman, daddy_longlegs, Phalangium_opilio scorpion false_scorpion, pseudoscorpion book_scorpion, Chelifer_cancroides whip-scorpion, whip_scorpion vinegarroon, Mastigoproctus_giganteus spider orb-weaving_spider black_and_gold_garden_spider, Argiope_aurantia barn_spider, Araneus_cavaticus garden_spider, Aranea_diademata comb-footed_spider, theridiid black_widow, Latrodectus_mactans tarantula wolf_spider, hunting_spider European_wolf_spider, tarantula, Lycosa_tarentula trap-door_spider acarine tick hard_tick, ixodid Ixodes_dammini, deer_tick Ixodes_neotomae Ixodes_pacificus, western_black-legged_tick Ixodes_scapularis, black-legged_tick sheep-tick, sheep_tick, Ixodes_ricinus Ixodes_persulcatus Ixodes_dentatus Ixodes_spinipalpis wood_tick, American_dog_tick, Dermacentor_variabilis soft_tick, argasid mite web-spinning_mite acarid trombidiid trombiculid harvest_mite, chigger, jigger, redbug acarus, genus_Acarus itch_mite, sarcoptid rust_mite spider_mite, tetranychid red_spider, red_spider_mite, Panonychus_ulmi myriapod garden_centipede, garden_symphilid, symphilid, Scutigerella_immaculata tardigrade centipede house_centipede, Scutigera_coleoptrata millipede, millepede, milliped sea_spider, pycnogonid Merostomata, class_Merostomata horseshoe_crab, king_crab, Limulus_polyphemus, Xiphosurus_polyphemus Asian_horseshoe_crab eurypterid tongue_worm, pentastomid gallinaceous_bird, gallinacean domestic_fowl, fowl, poultry Dorking Plymouth_Rock Cornish, Cornish_fowl Rock_Cornish game_fowl cochin, cochin_china jungle_fowl, gallina jungle_cock jungle_hen red_jungle_fowl, Gallus_gallus chicken, Gallus_gallus bantam chick, biddy cock, rooster cockerel capon hen, biddy cackler brood_hen, broody, broody_hen, setting_hen, sitter mother_hen layer pullet spring_chicken Rhode_Island_red Dominique, Dominick Orpington turkey, Meleagris_gallopavo turkey_cock, gobbler, tom, tom_turkey ocellated_turkey, Agriocharis_ocellata grouse black_grouse European_black_grouse, heathfowl, Lyrurus_tetrix Asian_black_grouse, Lyrurus_mlokosiewiczi blackcock, black_cock greyhen, grayhen, grey_hen, gray_hen, heath_hen ptarmigan red_grouse, moorfowl, moorbird, moor-bird, moorgame, Lagopus_scoticus moorhen capercaillie, capercailzie, horse_of_the_wood, Tetrao_urogallus spruce_grouse, Canachites_canadensis sage_grouse, sage_hen, Centrocercus_urophasianus ruffed_grouse, partridge, Bonasa_umbellus sharp-tailed_grouse, sprigtail, sprig_tail, Pedioecetes_phasianellus prairie_chicken, prairie_grouse, prairie_fowl greater_prairie_chicken, Tympanuchus_cupido lesser_prairie_chicken, Tympanuchus_pallidicinctus heath_hen, Tympanuchus_cupido_cupido guan curassow piping_guan chachalaca Texas_chachalaca, Ortilis_vetula_macalli megapode, mound_bird, mound-bird, mound_builder, scrub_fowl mallee_fowl, leipoa, lowan, Leipoa_ocellata mallee_hen brush_turkey, Alectura_lathami maleo, Macrocephalon_maleo phasianid pheasant ring-necked_pheasant, Phasianus_colchicus afropavo, Congo_peafowl, Afropavo_congensis argus, argus_pheasant golden_pheasant, Chrysolophus_pictus bobwhite, bobwhite_quail, partridge northern_bobwhite, Colinus_virginianus Old_World_quail migratory_quail, Coturnix_coturnix, Coturnix_communis monal, monaul peafowl, bird_of_Juno peachick, pea-chick peacock peahen blue_peafowl, Pavo_cristatus green_peafowl, Pavo_muticus quail California_quail, Lofortyx_californicus tragopan partridge Hungarian_partridge, grey_partridge, gray_partridge, Perdix_perdix red-legged_partridge, Alectoris_ruffa Greek_partridge, rock_partridge, Alectoris_graeca mountain_quail, mountain_partridge, Oreortyx_picta_palmeri guinea_fowl, guinea, Numida_meleagris guinea_hen hoatzin, hoactzin, stinkbird, Opisthocomus_hoazin tinamou, partridge columbiform_bird dodo, Raphus_cucullatus pigeon pouter_pigeon, pouter dove rock_dove, rock_pigeon, Columba_livia band-tailed_pigeon, band-tail_pigeon, bandtail, Columba_fasciata wood_pigeon, ringdove, cushat, Columba_palumbus turtledove Streptopelia_turtur ringdove, Streptopelia_risoria Australian_turtledove, turtledove, Stictopelia_cuneata mourning_dove, Zenaidura_macroura domestic_pigeon squab fairy_swallow roller, tumbler, tumbler_pigeon homing_pigeon, homer carrier_pigeon passenger_pigeon, Ectopistes_migratorius sandgrouse, sand_grouse painted_sandgrouse, Pterocles_indicus pin-tailed_sandgrouse, pin-tailed_grouse, Pterocles_alchata pallas's_sandgrouse, Syrrhaptes_paradoxus parrot popinjay poll, poll_parrot African_grey, African_gray, Psittacus_erithacus amazon macaw kea, Nestor_notabilis cockatoo sulphur-crested_cockatoo, Kakatoe_galerita, Cacatua_galerita pink_cockatoo, Kakatoe_leadbeateri cockateel, cockatiel, cockatoo_parrot, Nymphicus_hollandicus lovebird lory lorikeet varied_Lorikeet, Glossopsitta_versicolor rainbow_lorikeet, Trichoglossus_moluccanus parakeet, parrakeet, parroket, paraquet, paroquet, parroquet Carolina_parakeet, Conuropsis_carolinensis budgerigar, budgereegah, budgerygah, budgie, grass_parakeet, lovebird, shell_parakeet, Melopsittacus_undulatus ring-necked_parakeet, Psittacula_krameri cuculiform_bird cuckoo European_cuckoo, Cuculus_canorus black-billed_cuckoo, Coccyzus_erythropthalmus roadrunner, chaparral_cock, Geococcyx_californianus ani coucal crow_pheasant, Centropus_sinensis touraco, turaco, turacou, turakoo coraciiform_bird roller European_roller, Coracias_garrulus ground_roller kingfisher Eurasian_kingfisher, Alcedo_atthis belted_kingfisher, Ceryle_alcyon kookaburra, laughing_jackass, Dacelo_gigas bee_eater hornbill hoopoe, hoopoo Euopean_hoopoe, Upupa_epops wood_hoopoe motmot, momot tody apodiform_bird swift European_swift, Apus_apus chimney_swift, chimney_swallow, Chateura_pelagica swiftlet, Collocalia_inexpectata tree_swift, crested_swift hummingbird Archilochus_colubris thornbill goatsucker, nightjar, caprimulgid European_goatsucker, European_nightjar, Caprimulgus_europaeus chuck-will's-widow, Caprimulgus_carolinensis whippoorwill, Caprimulgus_vociferus poorwill, Phalaenoptilus_nuttallii frogmouth oilbird, guacharo, Steatornis_caripensis piciform_bird woodpecker, peckerwood, pecker green_woodpecker, Picus_viridis downy_woodpecker flicker yellow-shafted_flicker, Colaptes_auratus, yellowhammer gilded_flicker, Colaptes_chrysoides red-shafted_flicker, Colaptes_caper_collaris ivorybill, ivory-billed_woodpecker, Campephilus_principalis redheaded_woodpecker, redhead, Melanerpes_erythrocephalus sapsucker yellow-bellied_sapsucker, Sphyrapicus_varius red-breasted_sapsucker, Sphyrapicus_varius_ruber wryneck piculet barbet puffbird honey_guide jacamar toucan toucanet trogon quetzal, quetzal_bird resplendent_quetzel, resplendent_trogon, Pharomacrus_mocino aquatic_bird waterfowl, water_bird, waterbird anseriform_bird duck drake quack-quack duckling diving_duck dabbling_duck, dabbler mallard, Anas_platyrhynchos black_duck, Anas_rubripes teal greenwing, green-winged_teal, Anas_crecca bluewing, blue-winged_teal, Anas_discors garganey, Anas_querquedula widgeon, wigeon, Anas_penelope American_widgeon, baldpate, Anas_americana shoveler, shoveller, broadbill, Anas_clypeata pintail, pin-tailed_duck, Anas_acuta sheldrake shelduck ruddy_duck, Oxyura_jamaicensis bufflehead, butterball, dipper, Bucephela_albeola goldeneye, whistler, Bucephela_clangula Barrow's_goldeneye, Bucephala_islandica canvasback, canvasback_duck, Aythya_valisineria pochard, Aythya_ferina redhead, Aythya_americana scaup, scaup_duck, bluebill, broadbill greater_scaup, Aythya_marila lesser_scaup, lesser_scaup_duck, lake_duck, Aythya_affinis wild_duck wood_duck, summer_duck, wood_widgeon, Aix_sponsa wood_drake mandarin_duck, Aix_galericulata muscovy_duck, musk_duck, Cairina_moschata sea_duck eider, eider_duck scoter, scooter common_scoter, Melanitta_nigra old_squaw, oldwife, Clangula_hyemalis merganser, fish_duck, sawbill, sheldrake goosander, Mergus_merganser American_merganser, Mergus_merganser_americanus red-breasted_merganser, Mergus_serrator smew, Mergus_albellus hooded_merganser, hooded_sheldrake, Lophodytes_cucullatus goose gosling gander Chinese_goose, Anser_cygnoides greylag, graylag, greylag_goose, graylag_goose, Anser_anser blue_goose, Chen_caerulescens snow_goose brant, brant_goose, brent, brent_goose common_brant_goose, Branta_bernicla honker, Canada_goose, Canadian_goose, Branta_canadensis barnacle_goose, barnacle, Branta_leucopsis coscoroba swan cob pen cygnet mute_swan, Cygnus_olor whooper, whooper_swan, Cygnus_cygnus tundra_swan, Cygnus_columbianus whistling_swan, Cygnus_columbianus_columbianus Bewick's_swan, Cygnus_columbianus_bewickii trumpeter, trumpeter_swan, Cygnus_buccinator black_swan, Cygnus_atratus screamer horned_screamer, Anhima_cornuta crested_screamer chaja, Chauna_torquata mammal, mammalian female_mammal tusker prototherian monotreme, egg-laying_mammal echidna, spiny_anteater, anteater echidna, spiny_anteater, anteater platypus, duckbill, duckbilled_platypus, duck-billed_platypus, Ornithorhynchus_anatinus marsupial, pouched_mammal opossum, possum common_opossum, Didelphis_virginiana, Didelphis_marsupialis crab-eating_opossum opossum_rat bandicoot rabbit-eared_bandicoot, rabbit_bandicoot, bilby, Macrotis_lagotis kangaroo giant_kangaroo, great_grey_kangaroo, Macropus_giganteus wallaby, brush_kangaroo common_wallaby, Macropus_agiles hare_wallaby, kangaroo_hare nail-tailed_wallaby, nail-tailed_kangaroo rock_wallaby, rock_kangaroo pademelon, paddymelon tree_wallaby, tree_kangaroo musk_kangaroo, Hypsiprymnodon_moschatus rat_kangaroo, kangaroo_rat potoroo bettong jerboa_kangaroo, kangaroo_jerboa phalanger, opossum, possum cuscus brush-tailed_phalanger, Trichosurus_vulpecula flying_phalanger, flying_opossum, flying_squirrel koala, koala_bear, kangaroo_bear, native_bear, Phascolarctos_cinereus wombat dasyurid_marsupial, dasyurid dasyure eastern_dasyure, Dasyurus_quoll native_cat, Dasyurus_viverrinus thylacine, Tasmanian_wolf, Tasmanian_tiger, Thylacinus_cynocephalus Tasmanian_devil, ursine_dasyure, Sarcophilus_hariisi pouched_mouse, marsupial_mouse, marsupial_rat numbat, banded_anteater, anteater, Myrmecobius_fasciatus pouched_mole, marsupial_mole, Notoryctus_typhlops placental, placental_mammal, eutherian, eutherian_mammal livestock, stock, farm_animal bull cow calf calf yearling buck doe insectivore mole starnose_mole, star-nosed_mole, Condylura_cristata brewer's_mole, hair-tailed_mole, Parascalops_breweri golden_mole shrew_mole Asiatic_shrew_mole, Uropsilus_soricipes American_shrew_mole, Neurotrichus_gibbsii shrew, shrewmouse common_shrew, Sorex_araneus masked_shrew, Sorex_cinereus short-tailed_shrew, Blarina_brevicauda water_shrew American_water_shrew, Sorex_palustris European_water_shrew, Neomys_fodiens Mediterranean_water_shrew, Neomys_anomalus least_shrew, Cryptotis_parva hedgehog, Erinaceus_europaeus, Erinaceus_europeaeus tenrec, tendrac tailless_tenrec, Tenrec_ecaudatus otter_shrew, potamogale, Potamogale_velox eiderdown aftershaft sickle_feather contour_feather bastard_wing, alula, spurious_wing saddle_hackle, saddle_feather encolure hair squama scute sclerite plastron scallop_shell oyster_shell theca invertebrate sponge, poriferan, parazoan choanocyte, collar_cell glass_sponge Venus's_flower_basket metazoan coelenterate, cnidarian planula polyp medusa, medusoid, medusan jellyfish scyphozoan Chrysaora_quinquecirrha hydrozoan, hydroid hydra siphonophore nanomia Portuguese_man-of-war, man-of-war, jellyfish praya apolemia anthozoan, actinozoan sea_anemone, anemone actinia, actinian, actiniarian sea_pen coral gorgonian, gorgonian_coral sea_feather sea_fan red_coral stony_coral, madrepore, madriporian_coral brain_coral staghorn_coral, stag's-horn_coral mushroom_coral ctenophore, comb_jelly beroe platyctenean sea_gooseberry Venus's_girdle, Cestum_veneris worm helminth, parasitic_worm woodworm woodborer, borer acanthocephalan, spiny-headed_worm arrowworm, chaetognath bladder_worm flatworm, platyhelminth planarian, planaria fluke, trematode, trematode_worm cercaria liver_fluke, Fasciola_hepatica Fasciolopsis_buski schistosome, blood_fluke tapeworm, cestode echinococcus taenia ribbon_worm, nemertean, nemertine, proboscis_worm beard_worm, pogonophoran rotifer nematode, nematode_worm, roundworm common_roundworm, Ascaris_lumbricoides chicken_roundworm, Ascaridia_galli pinworm, threadworm, Enterobius_vermicularis eelworm vinegar_eel, vinegar_worm, Anguillula_aceti, Turbatrix_aceti trichina, Trichinella_spiralis hookworm filaria Guinea_worm, Dracunculus_medinensis annelid, annelid_worm, segmented_worm archiannelid oligochaete, oligochaete_worm earthworm, angleworm, fishworm, fishing_worm, wiggler, nightwalker, nightcrawler, crawler, dew_worm, red_worm polychaete, polychete, polychaete_worm, polychete_worm lugworm, lug, lobworm sea_mouse bloodworm leech, bloodsucker, hirudinean medicinal_leech, Hirudo_medicinalis horseleech mollusk, mollusc, shellfish scaphopod tooth_shell, tusk_shell gastropod, univalve abalone, ear-shell ormer, sea-ear, Haliotis_tuberculata scorpion_shell conch giant_conch, Strombus_gigas snail edible_snail, Helix_pomatia garden_snail brown_snail, Helix_aspersa Helix_hortensis slug seasnail neritid, neritid_gastropod nerita bleeding_tooth, Nerita_peloronta neritina whelk moon_shell, moonshell periwinkle, winkle limpet common_limpet, Patella_vulgata keyhole_limpet, Fissurella_apertura, Diodora_apertura river_limpet, freshwater_limpet, Ancylus_fluviatilis sea_slug, nudibranch sea_hare, Aplysia_punctata Hermissenda_crassicornis bubble_shell physa cowrie, cowry money_cowrie, Cypraea_moneta tiger_cowrie, Cypraea_tigris solenogaster, aplacophoran chiton, coat-of-mail_shell, sea_cradle, polyplacophore bivalve, pelecypod, lamellibranch spat clam seashell soft-shell_clam, steamer, steamer_clam, long-neck_clam, Mya_arenaria quahog, quahaug, hard-shell_clam, hard_clam, round_clam, Venus_mercenaria, Mercenaria_mercenaria littleneck, littleneck_clam cherrystone, cherrystone_clam geoduck razor_clam, jackknife_clam, knife-handle giant_clam, Tridacna_gigas cockle edible_cockle, Cardium_edule oyster Japanese_oyster, Ostrea_gigas Virginia_oyster pearl_oyster, Pinctada_margaritifera saddle_oyster, Anomia_ephippium window_oyster, windowpane_oyster, capiz, Placuna_placenta ark_shell blood_clam mussel marine_mussel, mytilid edible_mussel, Mytilus_edulis freshwater_mussel, freshwater_clam pearly-shelled_mussel thin-shelled_mussel zebra_mussel, Dreissena_polymorpha scallop, scollop, escallop bay_scallop, Pecten_irradians sea_scallop, giant_scallop, Pecten_magellanicus shipworm, teredinid teredo piddock cephalopod, cephalopod_mollusk chambered_nautilus, pearly_nautilus, nautilus octopod octopus, devilfish paper_nautilus, nautilus, Argonaut, Argonauta_argo decapod squid loligo ommastrephes architeuthis, giant_squid cuttlefish, cuttle spirula, Spirula_peronii crustacean malacostracan_crustacean decapod_crustacean, decapod brachyuran crab stone_crab, Menippe_mercenaria hard-shell_crab soft-shell_crab, soft-shelled_crab Dungeness_crab, Cancer_magister rock_crab, Cancer_irroratus Jonah_crab, Cancer_borealis swimming_crab English_lady_crab, Portunus_puber American_lady_crab, lady_crab, calico_crab, Ovalipes_ocellatus blue_crab, Callinectes_sapidus fiddler_crab pea_crab king_crab, Alaska_crab, Alaskan_king_crab, Alaska_king_crab, Paralithodes_camtschatica spider_crab European_spider_crab, king_crab, Maja_squinado giant_crab, Macrocheira_kaempferi lobster true_lobster American_lobster, Northern_lobster, Maine_lobster, Homarus_americanus European_lobster, Homarus_vulgaris Cape_lobster, Homarus_capensis Norway_lobster, Nephrops_norvegicus spiny_lobster, langouste, rock_lobster, crawfish, crayfish, sea_crawfish crayfish, crawfish, crawdad, crawdaddy Old_World_crayfish, ecrevisse American_crayfish hermit_crab shrimp snapping_shrimp, pistol_shrimp prawn long-clawed_prawn, river_prawn, Palaemon_australis tropical_prawn krill Euphausia_pacifica opossum_shrimp stomatopod, stomatopod_crustacean mantis_shrimp, mantis_crab squilla, mantis_prawn isopod woodlouse, slater pill_bug sow_bug sea_louse, sea_slater amphipod skeleton_shrimp whale_louse daphnia, water_flea fairy_shrimp brine_shrimp, Artemia_salina tadpole_shrimp copepod, copepod_crustacean cyclops, water_flea seed_shrimp, mussel_shrimp, ostracod barnacle, cirriped, cirripede acorn_barnacle, rock_barnacle, Balanus_balanoides goose_barnacle, gooseneck_barnacle, Lepas_fascicularis onychophoran, velvet_worm, peripatus wading_bird, wader stork white_stork, Ciconia_ciconia black_stork, Ciconia_nigra adjutant_bird, adjutant, adjutant_stork, Leptoptilus_dubius marabou, marabout, marabou_stork, Leptoptilus_crumeniferus openbill jabiru, Jabiru_mycteria saddlebill, jabiru, Ephippiorhynchus_senegalensis policeman_bird, black-necked_stork, jabiru, Xenorhyncus_asiaticus wood_ibis, wood_stork, flinthead, Mycteria_americana shoebill, shoebird, Balaeniceps_rex ibis wood_ibis, wood_stork, Ibis_ibis sacred_ibis, Threskiornis_aethiopica spoonbill common_spoonbill, Platalea_leucorodia roseate_spoonbill, Ajaia_ajaja flamingo heron great_blue_heron, Ardea_herodius great_white_heron, Ardea_occidentalis egret little_blue_heron, Egretta_caerulea snowy_egret, snowy_heron, Egretta_thula little_egret, Egretta_garzetta great_white_heron, Casmerodius_albus American_egret, great_white_heron, Egretta_albus cattle_egret, Bubulcus_ibis night_heron, night_raven black-crowned_night_heron, Nycticorax_nycticorax yellow-crowned_night_heron, Nyctanassa_violacea boatbill, boat-billed_heron, broadbill, Cochlearius_cochlearius bittern American_bittern, stake_driver, Botaurus_lentiginosus European_bittern, Botaurus_stellaris least_bittern, Ixobrychus_exilis crane whooping_crane, whooper, Grus_americana courlan, Aramus_guarauna limpkin, Aramus_pictus crested_cariama, seriema, Cariama_cristata chunga, seriema, Chunga_burmeisteri rail weka, maori_hen, wood_hen crake corncrake, land_rail, Crex_crex spotted_crake, Porzana_porzana gallinule, marsh_hen, water_hen, swamphen Florida_gallinule, Gallinula_chloropus_cachinnans moorhen, Gallinula_chloropus purple_gallinule European_gallinule, Porphyrio_porphyrio American_gallinule, Porphyrula_martinica notornis, takahe, Notornis_mantelli coot American_coot, marsh_hen, mud_hen, water_hen, Fulica_americana Old_World_coot, Fulica_atra bustard great_bustard, Otis_tarda plain_turkey, Choriotis_australis button_quail, button-quail, bustard_quail, hemipode striped_button_quail, Turnix_sylvatica plain_wanderer, Pedionomus_torquatus trumpeter Brazilian_trumpeter, Psophia_crepitans seabird, sea_bird, seafowl shorebird, shore_bird, limicoline_bird plover piping_plover, Charadrius_melodus killdeer, kildeer, killdeer_plover, Charadrius_vociferus dotterel, dotrel, Charadrius_morinellus, Eudromias_morinellus golden_plover lapwing, green_plover, peewit, pewit turnstone ruddy_turnstone, Arenaria_interpres black_turnstone, Arenaria-Melanocephala sandpiper surfbird, Aphriza_virgata European_sandpiper, Actitis_hypoleucos spotted_sandpiper, Actitis_macularia least_sandpiper, stint, Erolia_minutilla red-backed_sandpiper, dunlin, Erolia_alpina greenshank, Tringa_nebularia redshank, Tringa_totanus yellowlegs greater_yellowlegs, Tringa_melanoleuca lesser_yellowlegs, Tringa_flavipes pectoral_sandpiper, jacksnipe, Calidris_melanotos knot, greyback, grayback, Calidris_canutus curlew_sandpiper, Calidris_Ferruginea sanderling, Crocethia_alba upland_sandpiper, upland_plover, Bartramian_sandpiper, Bartramia_longicauda ruff, Philomachus_pugnax reeve tattler Polynesian_tattler, Heteroscelus_incanus willet, Catoptrophorus_semipalmatus woodcock Eurasian_woodcock, Scolopax_rusticola American_woodcock, woodcock_snipe, Philohela_minor snipe whole_snipe, Gallinago_gallinago Wilson's_snipe, Gallinago_gallinago_delicata great_snipe, woodcock_snipe, Gallinago_media jacksnipe, half_snipe, Limnocryptes_minima dowitcher greyback, grayback, Limnodromus_griseus red-breasted_snipe, Limnodromus_scolopaceus curlew European_curlew, Numenius_arquata Eskimo_curlew, Numenius_borealis godwit Hudsonian_godwit, Limosa_haemastica stilt, stiltbird, longlegs, long-legs, stilt_plover, Himantopus_stilt black-necked_stilt, Himantopus_mexicanus black-winged_stilt, Himantopus_himantopus white-headed_stilt, Himantopus_himantopus_leucocephalus kaki, Himantopus_novae-zelandiae stilt, Australian_stilt banded_stilt, Cladorhyncus_leucocephalum avocet oystercatcher, oyster_catcher phalarope red_phalarope, Phalaropus_fulicarius northern_phalarope, Lobipes_lobatus Wilson's_phalarope, Steganopus_tricolor pratincole, glareole courser cream-colored_courser, Cursorius_cursor crocodile_bird, Pluvianus_aegyptius stone_curlew, thick-knee, Burhinus_oedicnemus coastal_diving_bird larid gull, seagull, sea_gull mew, mew_gull, sea_mew, Larus_canus black-backed_gull, great_black-backed_gull, cob, Larus_marinus herring_gull, Larus_argentatus laughing_gull, blackcap, pewit, pewit_gull, Larus_ridibundus ivory_gull, Pagophila_eburnea kittiwake tern sea_swallow, Sterna_hirundo skimmer jaeger parasitic_jaeger, arctic_skua, Stercorarius_parasiticus skua, bonxie great_skua, Catharacta_skua auk auklet razorbill, razor-billed_auk, Alca_torda little_auk, dovekie, Plautus_alle guillemot black_guillemot, Cepphus_grylle pigeon_guillemot, Cepphus_columba murre common_murre, Uria_aalge thick-billed_murre, Uria_lomvia puffin Atlantic_puffin, Fratercula_arctica horned_puffin, Fratercula_corniculata tufted_puffin, Lunda_cirrhata gaviiform_seabird loon, diver podicipitiform_seabird grebe great_crested_grebe, Podiceps_cristatus red-necked_grebe, Podiceps_grisegena black-necked_grebe, eared_grebe, Podiceps_nigricollis dabchick, little_grebe, Podiceps_ruficollis pied-billed_grebe, Podilymbus_podiceps pelecaniform_seabird pelican white_pelican, Pelecanus_erythrorhynchos Old_world_white_pelican, Pelecanus_onocrotalus frigate_bird, man-of-war_bird gannet solan, solan_goose, solant_goose, Sula_bassana booby cormorant, Phalacrocorax_carbo snakebird, anhinga, darter water_turkey, Anhinga_anhinga tropic_bird, tropicbird, boatswain_bird sphenisciform_seabird penguin Adelie, Adelie_penguin, Pygoscelis_adeliae king_penguin, Aptenodytes_patagonica emperor_penguin, Aptenodytes_forsteri jackass_penguin, Spheniscus_demersus rock_hopper, crested_penguin pelagic_bird, oceanic_bird procellariiform_seabird albatross, mollymawk wandering_albatross, Diomedea_exulans black-footed_albatross, gooney, gooney_bird, goonie, goony, Diomedea_nigripes petrel white-chinned_petrel, Procellaria_aequinoctialis giant_petrel, giant_fulmar, Macronectes_giganteus fulmar, fulmar_petrel, Fulmarus_glacialis shearwater Manx_shearwater, Puffinus_puffinus storm_petrel stormy_petrel, northern_storm_petrel, Hydrobates_pelagicus Mother_Carey's_chicken, Mother_Carey's_hen, Oceanites_oceanicus diving_petrel aquatic_mammal cetacean, cetacean_mammal, blower whale baleen_whale, whalebone_whale right_whale bowhead, bowhead_whale, Greenland_whale, Balaena_mysticetus rorqual, razorback blue_whale, sulfur_bottom, Balaenoptera_musculus finback, finback_whale, fin_whale, common_rorqual, Balaenoptera_physalus sei_whale, Balaenoptera_borealis lesser_rorqual, piked_whale, minke_whale, Balaenoptera_acutorostrata humpback, humpback_whale, Megaptera_novaeangliae grey_whale, gray_whale, devilfish, Eschrichtius_gibbosus, Eschrichtius_robustus toothed_whale sperm_whale, cachalot, black_whale, Physeter_catodon pygmy_sperm_whale, Kogia_breviceps dwarf_sperm_whale, Kogia_simus beaked_whale bottle-nosed_whale, bottlenose_whale, bottlenose, Hyperoodon_ampullatus dolphin common_dolphin, Delphinus_delphis bottlenose_dolphin, bottle-nosed_dolphin, bottlenose Atlantic_bottlenose_dolphin, Tursiops_truncatus Pacific_bottlenose_dolphin, Tursiops_gilli porpoise harbor_porpoise, herring_hog, Phocoena_phocoena vaquita, Phocoena_sinus grampus, Grampus_griseus killer_whale, killer, orca, grampus, sea_wolf, Orcinus_orca pilot_whale, black_whale, common_blackfish, blackfish, Globicephala_melaena river_dolphin narwhal, narwal, narwhale, Monodon_monoceros white_whale, beluga, Delphinapterus_leucas sea_cow, sirenian_mammal, sirenian manatee, Trichechus_manatus dugong, Dugong_dugon Steller's_sea_cow, Hydrodamalis_gigas carnivore omnivore pinniped_mammal, pinniped, pinnatiped seal crabeater_seal, crab-eating_seal eared_seal fur_seal guadalupe_fur_seal, Arctocephalus_philippi fur_seal Alaska_fur_seal, Callorhinus_ursinus sea_lion South_American_sea_lion, Otaria_Byronia California_sea_lion, Zalophus_californianus, Zalophus_californicus Australian_sea_lion, Zalophus_lobatus Steller_sea_lion, Steller's_sea_lion, Eumetopias_jubatus earless_seal, true_seal, hair_seal harbor_seal, common_seal, Phoca_vitulina harp_seal, Pagophilus_groenlandicus elephant_seal, sea_elephant bearded_seal, squareflipper_square_flipper, Erignathus_barbatus hooded_seal, bladdernose, Cystophora_cristata walrus, seahorse, sea_horse Atlantic_walrus, Odobenus_rosmarus Pacific_walrus, Odobenus_divergens Fissipedia fissiped_mammal, fissiped aardvark, ant_bear, anteater, Orycteropus_afer canine, canid bitch brood_bitch dog, domestic_dog, Canis_familiaris pooch, doggie, doggy, barker, bow-wow cur, mongrel, mutt feist, fice pariah_dog, pye-dog, pie-dog lapdog toy_dog, toy Chihuahua Japanese_spaniel Maltese_dog, Maltese_terrier, Maltese Pekinese, Pekingese, Peke Shih-Tzu toy_spaniel English_toy_spaniel Blenheim_spaniel King_Charles_spaniel papillon toy_terrier hunting_dog courser Rhodesian_ridgeback hound, hound_dog Afghan_hound, Afghan basset, basset_hound beagle bloodhound, sleuthhound bluetick boarhound coonhound coondog black-and-tan_coonhound dachshund, dachsie, badger_dog sausage_dog, sausage_hound foxhound American_foxhound Walker_hound, Walker_foxhound English_foxhound harrier Plott_hound redbone wolfhound borzoi, Russian_wolfhound Irish_wolfhound greyhound Italian_greyhound whippet Ibizan_hound, Ibizan_Podenco Norwegian_elkhound, elkhound otterhound, otter_hound Saluki, gazelle_hound Scottish_deerhound, deerhound staghound Weimaraner terrier bullterrier, bull_terrier Staffordshire_bullterrier, Staffordshire_bull_terrier American_Staffordshire_terrier, Staffordshire_terrier, American_pit_bull_terrier, pit_bull_terrier Bedlington_terrier Border_terrier Kerry_blue_terrier Irish_terrier Norfolk_terrier Norwich_terrier Yorkshire_terrier rat_terrier, ratter Manchester_terrier, black-and-tan_terrier toy_Manchester, toy_Manchester_terrier fox_terrier smooth-haired_fox_terrier wire-haired_fox_terrier wirehair, wirehaired_terrier, wire-haired_terrier Lakeland_terrier Welsh_terrier Sealyham_terrier, Sealyham Airedale, Airedale_terrier cairn, cairn_terrier Australian_terrier Dandie_Dinmont, Dandie_Dinmont_terrier Boston_bull, Boston_terrier schnauzer miniature_schnauzer giant_schnauzer standard_schnauzer Scotch_terrier, Scottish_terrier, Scottie Tibetan_terrier, chrysanthemum_dog silky_terrier, Sydney_silky Skye_terrier Clydesdale_terrier soft-coated_wheaten_terrier West_Highland_white_terrier Lhasa, Lhasa_apso sporting_dog, gun_dog bird_dog water_dog retriever flat-coated_retriever curly-coated_retriever golden_retriever Labrador_retriever Chesapeake_Bay_retriever pointer, Spanish_pointer German_short-haired_pointer setter vizsla, Hungarian_pointer English_setter Irish_setter, red_setter Gordon_setter spaniel Brittany_spaniel clumber, clumber_spaniel field_spaniel springer_spaniel, springer English_springer, English_springer_spaniel Welsh_springer_spaniel cocker_spaniel, English_cocker_spaniel, cocker Sussex_spaniel water_spaniel American_water_spaniel Irish_water_spaniel griffon, wire-haired_pointing_griffon working_dog watchdog, guard_dog kuvasz attack_dog housedog schipperke shepherd_dog, sheepdog, sheep_dog Belgian_sheepdog, Belgian_shepherd groenendael malinois briard kelpie komondor Old_English_sheepdog, bobtail Shetland_sheepdog, Shetland_sheep_dog, Shetland collie Border_collie Bouvier_des_Flandres, Bouviers_des_Flandres Rottweiler German_shepherd, German_shepherd_dog, German_police_dog, alsatian police_dog pinscher Doberman, Doberman_pinscher miniature_pinscher Sennenhunde Greater_Swiss_Mountain_dog Bernese_mountain_dog Appenzeller EntleBucher boxer mastiff bull_mastiff Tibetan_mastiff bulldog, English_bulldog French_bulldog Great_Dane guide_dog Seeing_Eye_dog hearing_dog Saint_Bernard, St_Bernard seizure-alert_dog sled_dog, sledge_dog Eskimo_dog, husky malamute, malemute, Alaskan_malamute Siberian_husky dalmatian, coach_dog, carriage_dog liver-spotted_dalmatian affenpinscher, monkey_pinscher, monkey_dog basenji pug, pug-dog Leonberg Newfoundland, Newfoundland_dog Great_Pyrenees spitz Samoyed, Samoyede Pomeranian chow, chow_chow keeshond griffon, Brussels_griffon, Belgian_griffon Brabancon_griffon corgi, Welsh_corgi Pembroke, Pembroke_Welsh_corgi Cardigan, Cardigan_Welsh_corgi poodle, poodle_dog toy_poodle miniature_poodle standard_poodle large_poodle Mexican_hairless wolf timber_wolf, grey_wolf, gray_wolf, Canis_lupus white_wolf, Arctic_wolf, Canis_lupus_tundrarum red_wolf, maned_wolf, Canis_rufus, Canis_niger coyote, prairie_wolf, brush_wolf, Canis_latrans coydog jackal, Canis_aureus wild_dog dingo, warrigal, warragal, Canis_dingo dhole, Cuon_alpinus crab-eating_dog, crab-eating_fox, Dusicyon_cancrivorus raccoon_dog, Nyctereutes_procyonides African_hunting_dog, hyena_dog, Cape_hunting_dog, Lycaon_pictus hyena, hyaena striped_hyena, Hyaena_hyaena brown_hyena, strand_wolf, Hyaena_brunnea spotted_hyena, laughing_hyena, Crocuta_crocuta aardwolf, Proteles_cristata fox vixen Reynard red_fox, Vulpes_vulpes black_fox silver_fox red_fox, Vulpes_fulva kit_fox, prairie_fox, Vulpes_velox kit_fox, Vulpes_macrotis Arctic_fox, white_fox, Alopex_lagopus blue_fox grey_fox, gray_fox, Urocyon_cinereoargenteus feline, felid cat, true_cat domestic_cat, house_cat, Felis_domesticus, Felis_catus kitty, kitty-cat, puss, pussy, pussycat mouser alley_cat stray tom, tomcat gib tabby, queen kitten, kitty tabby, tabby_cat tiger_cat tortoiseshell, tortoiseshell-cat, calico_cat Persian_cat Angora, Angora_cat Siamese_cat, Siamese blue_point_Siamese Burmese_cat Egyptian_cat Maltese, Maltese_cat Abyssinian, Abyssinian_cat Manx, Manx_cat wildcat sand_cat European_wildcat, catamountain, Felis_silvestris cougar, puma, catamount, mountain_lion, painter, panther, Felis_concolor ocelot, panther_cat, Felis_pardalis jaguarundi, jaguarundi_cat, jaguarondi, eyra, Felis_yagouaroundi kaffir_cat, caffer_cat, Felis_ocreata jungle_cat, Felis_chaus serval, Felis_serval leopard_cat, Felis_bengalensis margay, margay_cat, Felis_wiedi manul, Pallas's_cat, Felis_manul lynx, catamount common_lynx, Lynx_lynx Canada_lynx, Lynx_canadensis bobcat, bay_lynx, Lynx_rufus spotted_lynx, Lynx_pardina caracal, desert_lynx, Lynx_caracal big_cat, cat leopard, Panthera_pardus leopardess panther snow_leopard, ounce, Panthera_uncia jaguar, panther, Panthera_onca, Felis_onca lion, king_of_beasts, Panthera_leo lioness lionet tiger, Panthera_tigris Bengal_tiger tigress liger tiglon, tigon cheetah, chetah, Acinonyx_jubatus saber-toothed_tiger, sabertooth Smiledon_californicus bear brown_bear, bruin, Ursus_arctos bruin Syrian_bear, Ursus_arctos_syriacus grizzly, grizzly_bear, silvertip, silver-tip, Ursus_horribilis, Ursus_arctos_horribilis Alaskan_brown_bear, Kodiak_bear, Kodiak, Ursus_middendorffi, Ursus_arctos_middendorffi American_black_bear, black_bear, Ursus_americanus, Euarctos_americanus cinnamon_bear Asiatic_black_bear, black_bear, Ursus_thibetanus, Selenarctos_thibetanus ice_bear, polar_bear, Ursus_Maritimus, Thalarctos_maritimus sloth_bear, Melursus_ursinus, Ursus_ursinus viverrine, viverrine_mammal civet, civet_cat large_civet, Viverra_zibetha small_civet, Viverricula_indica, Viverricula_malaccensis binturong, bearcat, Arctictis_bintourong Cryptoprocta, genus_Cryptoprocta fossa, fossa_cat, Cryptoprocta_ferox fanaloka, Fossa_fossa genet, Genetta_genetta banded_palm_civet, Hemigalus_hardwickii mongoose Indian_mongoose, Herpestes_nyula ichneumon, Herpestes_ichneumon palm_cat, palm_civet meerkat, mierkat slender-tailed_meerkat, Suricata_suricatta suricate, Suricata_tetradactyla bat, chiropteran fruit_bat, megabat flying_fox Pteropus_capestratus Pteropus_hypomelanus harpy, harpy_bat, tube-nosed_bat, tube-nosed_fruit_bat Cynopterus_sphinx carnivorous_bat, microbat mouse-eared_bat leafnose_bat, leaf-nosed_bat macrotus, Macrotus_californicus spearnose_bat Phyllostomus_hastatus hognose_bat, Choeronycteris_mexicana horseshoe_bat horseshoe_bat orange_bat, orange_horseshoe_bat, Rhinonicteris_aurantius false_vampire, false_vampire_bat big-eared_bat, Megaderma_lyra vespertilian_bat, vespertilionid frosted_bat, Vespertilio_murinus red_bat, Lasiurus_borealis brown_bat little_brown_bat, little_brown_myotis, Myotis_leucifugus cave_myotis, Myotis_velifer big_brown_bat, Eptesicus_fuscus serotine, European_brown_bat, Eptesicus_serotinus pallid_bat, cave_bat, Antrozous_pallidus pipistrelle, pipistrel, Pipistrellus_pipistrellus eastern_pipistrel, Pipistrellus_subflavus jackass_bat, spotted_bat, Euderma_maculata long-eared_bat western_big-eared_bat, Plecotus_townsendi freetail, free-tailed_bat, freetailed_bat guano_bat, Mexican_freetail_bat, Tadarida_brasiliensis pocketed_bat, pocketed_freetail_bat, Tadirida_femorosacca mastiff_bat vampire_bat, true_vampire_bat Desmodus_rotundus hairy-legged_vampire_bat, Diphylla_ecaudata predator, predatory_animal prey, quarry game big_game game_bird fossorial_mammal tetrapod quadruped hexapod biped insect social_insect holometabola, metabola defoliator pollinator gallfly scorpion_fly hanging_fly collembolan, springtail beetle tiger_beetle ladybug, ladybeetle, lady_beetle, ladybird, ladybird_beetle two-spotted_ladybug, Adalia_bipunctata Mexican_bean_beetle, bean_beetle, Epilachna_varivestis Hippodamia_convergens vedalia, Rodolia_cardinalis ground_beetle, carabid_beetle bombardier_beetle calosoma searcher, searcher_beetle, Calosoma_scrutator firefly, lightning_bug glowworm long-horned_beetle, longicorn, longicorn_beetle sawyer, sawyer_beetle pine_sawyer leaf_beetle, chrysomelid flea_beetle Colorado_potato_beetle, Colorado_beetle, potato_bug, potato_beetle, Leptinotarsa_decemlineata carpet_beetle, carpet_bug buffalo_carpet_beetle, Anthrenus_scrophulariae black_carpet_beetle clerid_beetle, clerid bee_beetle lamellicorn_beetle scarabaeid_beetle, scarabaeid, scarabaean dung_beetle scarab, scarabaeus, Scarabaeus_sacer tumblebug dorbeetle June_beetle, June_bug, May_bug, May_beetle green_June_beetle, figeater Japanese_beetle, Popillia_japonica Oriental_beetle, Asiatic_beetle, Anomala_orientalis rhinoceros_beetle melolonthid_beetle cockchafer, May_bug, May_beetle, Melolontha_melolontha rose_chafer, rose_bug, Macrodactylus_subspinosus rose_chafer, rose_beetle, Cetonia_aurata stag_beetle elaterid_beetle, elater, elaterid click_beetle, skipjack, snapping_beetle firefly, fire_beetle, Pyrophorus_noctiluca wireworm water_beetle whirligig_beetle deathwatch_beetle, deathwatch, Xestobium_rufovillosum weevil snout_beetle boll_weevil, Anthonomus_grandis blister_beetle, meloid oil_beetle Spanish_fly Dutch-elm_beetle, Scolytus_multistriatus bark_beetle spruce_bark_beetle, Dendroctonus_rufipennis rove_beetle darkling_beetle, darkling_groung_beetle, tenebrionid mealworm flour_beetle, flour_weevil seed_beetle, seed_weevil pea_weevil, Bruchus_pisorum bean_weevil, Acanthoscelides_obtectus rice_weevil, black_weevil, Sitophylus_oryzae Asian_longhorned_beetle, Anoplophora_glabripennis web_spinner louse, sucking_louse common_louse, Pediculus_humanus head_louse, Pediculus_capitis body_louse, cootie, Pediculus_corporis crab_louse, pubic_louse, crab, Phthirius_pubis bird_louse, biting_louse, louse flea Pulex_irritans dog_flea, Ctenocephalides_canis cat_flea, Ctenocephalides_felis chigoe, chigger, chigoe_flea, Tunga_penetrans sticktight, sticktight_flea, Echidnophaga_gallinacea dipterous_insect, two-winged_insects, dipteran, dipteron gall_midge, gallfly, gall_gnat Hessian_fly, Mayetiola_destructor fly housefly, house_fly, Musca_domestica tsetse_fly, tsetse, tzetze_fly, tzetze, glossina blowfly, blow_fly bluebottle, Calliphora_vicina greenbottle, greenbottle_fly flesh_fly, Sarcophaga_carnaria tachina_fly gadfly botfly human_botfly, Dermatobia_hominis sheep_botfly, sheep_gadfly, Oestrus_ovis warble_fly horsefly, cleg, clegg, horse_fly bee_fly robber_fly, bee_killer fruit_fly, pomace_fly apple_maggot, railroad_worm, Rhagoletis_pomonella Mediterranean_fruit_fly, medfly, Ceratitis_capitata drosophila, Drosophila_melanogaster vinegar_fly leaf_miner, leaf-miner louse_fly, hippoboscid horse_tick, horsefly, Hippobosca_equina sheep_ked, sheep-tick, sheep_tick, Melophagus_Ovinus horn_fly, Haematobia_irritans mosquito wiggler, wriggler gnat yellow-fever_mosquito, Aedes_aegypti Asian_tiger_mosquito, Aedes_albopictus anopheline malarial_mosquito, malaria_mosquito common_mosquito, Culex_pipiens Culex_quinquefasciatus, Culex_fatigans gnat punkie, punky, punkey, no-see-um, biting_midge midge fungus_gnat psychodid sand_fly, sandfly, Phlebotomus_papatasii fungus_gnat, sciara, sciarid armyworm crane_fly, daddy_longlegs blackfly, black_fly, buffalo_gnat hymenopterous_insect, hymenopteran, hymenopteron, hymenopter bee drone queen_bee worker soldier worker_bee honeybee, Apis_mellifera Africanized_bee, Africanized_honey_bee, killer_bee, Apis_mellifera_scutellata, Apis_mellifera_adansonii black_bee, German_bee Carniolan_bee Italian_bee carpenter_bee bumblebee, humblebee cuckoo-bumblebee andrena, andrenid, mining_bee Nomia_melanderi, alkali_bee leaf-cutting_bee, leaf-cutter, leaf-cutter_bee mason_bee potter_bee wasp vespid, vespid_wasp paper_wasp hornet giant_hornet, Vespa_crabro common_wasp, Vespula_vulgaris bald-faced_hornet, white-faced_hornet, Vespula_maculata yellow_jacket, yellow_hornet, Vespula_maculifrons Polistes_annularis mason_wasp potter_wasp Mutillidae, family_Mutillidae velvet_ant sphecoid_wasp, sphecoid mason_wasp digger_wasp cicada_killer, Sphecius_speciosis mud_dauber gall_wasp, gallfly, cynipid_wasp, cynipid_gall_wasp chalcid_fly, chalcidfly, chalcid, chalcid_wasp strawworm, jointworm chalcis_fly ichneumon_fly sawfly birch_leaf_miner, Fenusa_pusilla ant, emmet, pismire pharaoh_ant, pharaoh's_ant, Monomorium_pharaonis little_black_ant, Monomorium_minimum army_ant, driver_ant, legionary_ant carpenter_ant fire_ant wood_ant, Formica_rufa slave_ant Formica_fusca slave-making_ant, slave-maker sanguinary_ant, Formica_sanguinea bulldog_ant Amazon_ant, Polyergus_rufescens termite, white_ant dry-wood_termite Reticulitermes_lucifugus Mastotermes_darwiniensis Mastotermes_electrodominicus powder-post_termite, Cryptotermes_brevis orthopterous_insect, orthopteron, orthopteran grasshopper, hopper short-horned_grasshopper, acridid locust migratory_locust, Locusta_migratoria migratory_grasshopper long-horned_grasshopper, tettigoniid katydid mormon_cricket, Anabrus_simplex sand_cricket, Jerusalem_cricket, Stenopelmatus_fuscus cricket mole_cricket European_house_cricket, Acheta_domestica field_cricket, Acheta_assimilis tree_cricket snowy_tree_cricket, Oecanthus_fultoni phasmid, phasmid_insect walking_stick, walkingstick, stick_insect diapheromera, Diapheromera_femorata walking_leaf, leaf_insect cockroach, roach oriental_cockroach, oriental_roach, Asiatic_cockroach, blackbeetle, Blatta_orientalis American_cockroach, Periplaneta_americana Australian_cockroach, Periplaneta_australasiae German_cockroach, Croton_bug, crotonbug, water_bug, Blattella_germanica giant_cockroach mantis, mantid praying_mantis, praying_mantid, Mantis_religioso bug hemipterous_insect, bug, hemipteran, hemipteron leaf_bug, plant_bug mirid_bug, mirid, capsid four-lined_plant_bug, four-lined_leaf_bug, Poecilocapsus_lineatus lygus_bug tarnished_plant_bug, Lygus_lineolaris lace_bug lygaeid, lygaeid_bug chinch_bug, Blissus_leucopterus coreid_bug, coreid squash_bug, Anasa_tristis leaf-footed_bug, leaf-foot_bug bedbug, bed_bug, chinch, Cimex_lectularius backswimmer, Notonecta_undulata true_bug heteropterous_insect water_bug giant_water_bug water_scorpion water_boatman, boat_bug water_strider, pond-skater, water_skater common_pond-skater, Gerris_lacustris assassin_bug, reduviid conenose, cone-nosed_bug, conenose_bug, big_bedbug, kissing_bug wheel_bug, Arilus_cristatus firebug cotton_stainer homopterous_insect, homopteran whitefly citrus_whitefly, Dialeurodes_citri greenhouse_whitefly, Trialeurodes_vaporariorum sweet-potato_whitefly superbug, Bemisia_tabaci, poinsettia_strain cotton_strain coccid_insect scale_insect soft_scale brown_soft_scale, Coccus_hesperidum armored_scale San_Jose_scale, Aspidiotus_perniciosus cochineal_insect, cochineal, Dactylopius_coccus mealybug, mealy_bug citrophilous_mealybug, citrophilus_mealybug, Pseudococcus_fragilis Comstock_mealybug, Comstock's_mealybug, Pseudococcus_comstocki citrus_mealybug, Planococcus_citri plant_louse, louse aphid apple_aphid, green_apple_aphid, Aphis_pomi blackfly, bean_aphid, Aphis_fabae greenfly green_peach_aphid ant_cow woolly_aphid, woolly_plant_louse woolly_apple_aphid, American_blight, Eriosoma_lanigerum woolly_alder_aphid, Prociphilus_tessellatus adelgid balsam_woolly_aphid, Adelges_piceae spruce_gall_aphid, Adelges_abietis woolly_adelgid jumping_plant_louse, psylla, psyllid cicada, cicala dog-day_cicada, harvest_fly seventeen-year_locust, periodical_cicada, Magicicada_septendecim spittle_insect, spittlebug froghopper meadow_spittlebug, Philaenus_spumarius pine_spittlebug Saratoga_spittlebug, Aphrophora_saratogensis leafhopper plant_hopper, planthopper treehopper lantern_fly, lantern-fly psocopterous_insect psocid bark-louse, bark_louse booklouse, book_louse, deathwatch, Liposcelis_divinatorius common_booklouse, Trogium_pulsatorium ephemerid, ephemeropteran mayfly, dayfly, shadfly stonefly, stone_fly, plecopteran neuropteron, neuropteran, neuropterous_insect ant_lion, antlion, antlion_fly doodlebug, ant_lion, antlion lacewing, lacewing_fly aphid_lion, aphis_lion green_lacewing, chrysopid, stink_fly brown_lacewing, hemerobiid, hemerobiid_fly dobson, dobsonfly, dobson_fly, Corydalus_cornutus hellgrammiate, dobson fish_fly, fish-fly alderfly, alder_fly, Sialis_lutaria snakefly mantispid odonate dragonfly, darning_needle, devil's_darning_needle, sewing_needle, snake_feeder, snake_doctor, mosquito_hawk, skeeter_hawk damselfly trichopterous_insect, trichopteran, trichopteron caddis_fly, caddis-fly, caddice_fly, caddice-fly caseworm caddisworm, strawworm thysanuran_insect, thysanuron bristletail silverfish, Lepisma_saccharina firebrat, Thermobia_domestica jumping_bristletail, machilid thysanopter, thysanopteron, thysanopterous_insect thrips, thrip, thripid tobacco_thrips, Frankliniella_fusca onion_thrips, onion_louse, Thrips_tobaci earwig common_European_earwig, Forficula_auricularia lepidopterous_insect, lepidopteron, lepidopteran butterfly nymphalid, nymphalid_butterfly, brush-footed_butterfly, four-footed_butterfly mourning_cloak, mourning_cloak_butterfly, Camberwell_beauty, Nymphalis_antiopa tortoiseshell, tortoiseshell_butterfly painted_beauty, Vanessa_virginiensis admiral red_admiral, Vanessa_atalanta white_admiral, Limenitis_camilla banded_purple, white_admiral, Limenitis_arthemis red-spotted_purple, Limenitis_astyanax viceroy, Limenitis_archippus anglewing ringlet, ringlet_butterfly comma, comma_butterfly, Polygonia_comma fritillary silverspot emperor_butterfly, emperor purple_emperor, Apatura_iris peacock, peacock_butterfly, Inachis_io danaid, danaid_butterfly monarch, monarch_butterfly, milkweed_butterfly, Danaus_plexippus pierid, pierid_butterfly cabbage_butterfly small_white, Pieris_rapae large_white, Pieris_brassicae southern_cabbage_butterfly, Pieris_protodice sulphur_butterfly, sulfur_butterfly lycaenid, lycaenid_butterfly blue copper American_copper, Lycaena_hypophlaeas hairstreak, hairstreak_butterfly Strymon_melinus moth moth_miller, miller tortricid, tortricid_moth leaf_roller, leaf-roller tea_tortrix, tortrix, Homona_coffearia orange_tortrix, tortrix, Argyrotaenia_citrana codling_moth, codlin_moth, Carpocapsa_pomonella lymantriid, tussock_moth tussock_caterpillar gypsy_moth, gipsy_moth, Lymantria_dispar browntail, brown-tail_moth, Euproctis_phaeorrhoea gold-tail_moth, Euproctis_chrysorrhoea geometrid, geometrid_moth Paleacrita_vernata Alsophila_pometaria cankerworm spring_cankerworm fall_cankerworm measuring_worm, inchworm, looper pyralid, pyralid_moth bee_moth, wax_moth, Galleria_mellonella corn_borer, European_corn_borer_moth, corn_borer_moth, Pyrausta_nubilalis Mediterranean_flour_moth, Anagasta_kuehniella tobacco_moth, cacao_moth, Ephestia_elutella almond_moth, fig_moth, Cadra_cautella raisin_moth, Cadra_figulilella tineoid, tineoid_moth tineid, tineid_moth clothes_moth casemaking_clothes_moth, Tinea_pellionella webbing_clothes_moth, webbing_moth, Tineola_bisselliella carpet_moth, tapestry_moth, Trichophaga_tapetzella gelechiid, gelechiid_moth grain_moth angoumois_moth, angoumois_grain_moth, Sitotroga_cerealella potato_moth, potato_tuber_moth, splitworm, Phthorimaea_operculella potato_tuberworm, Phthorimaea_operculella noctuid_moth, noctuid, owlet_moth cutworm underwing red_underwing, Catocala_nupta antler_moth, Cerapteryx_graminis heliothis_moth, Heliothis_zia army_cutworm, Chorizagrotis_auxiliaris armyworm, Pseudaletia_unipuncta armyworm, army_worm, Pseudaletia_unipuncta Spodoptera_exigua beet_armyworm, Spodoptera_exigua Spodoptera_frugiperda fall_armyworm, Spodoptera_frugiperda hawkmoth, hawk_moth, sphingid, sphinx_moth, hummingbird_moth Manduca_sexta tobacco_hornworm, tomato_worm, Manduca_sexta Manduca_quinquemaculata tomato_hornworm, potato_worm, Manduca_quinquemaculata death's-head_moth, Acherontia_atropos bombycid, bombycid_moth, silkworm_moth domestic_silkworm_moth, domesticated_silkworm_moth, Bombyx_mori silkworm saturniid, saturniid_moth emperor, emperor_moth, Saturnia_pavonia imperial_moth, Eacles_imperialis giant_silkworm_moth, silkworm_moth silkworm, giant_silkworm, wild_wilkworm luna_moth, Actias_luna cecropia, cecropia_moth, Hyalophora_cecropia cynthia_moth, Samia_cynthia, Samia_walkeri ailanthus_silkworm, Samia_cynthia io_moth, Automeris_io polyphemus_moth, Antheraea_polyphemus pernyi_moth, Antheraea_pernyi tussah, tusseh, tussur, tussore, tusser, Antheraea_mylitta atlas_moth, Atticus_atlas arctiid, arctiid_moth tiger_moth cinnabar, cinnabar_moth, Callimorpha_jacobeae lasiocampid, lasiocampid_moth eggar, egger tent-caterpillar_moth, Malacosoma_americana tent_caterpillar tent-caterpillar_moth, Malacosoma_disstria forest_tent_caterpillar, Malacosoma_disstria lappet, lappet_moth lappet_caterpillar webworm webworm_moth Hyphantria_cunea fall_webworm, Hyphantria_cunea garden_webworm, Loxostege_similalis instar caterpillar corn_borer, Pyrausta_nubilalis bollworm pink_bollworm, Gelechia_gossypiella corn_earworm, cotton_bollworm, tomato_fruitworm, tobacco_budworm, vetchworm, Heliothis_zia cabbageworm, Pieris_rapae woolly_bear, woolly_bear_caterpillar woolly_bear_moth larva nymph leptocephalus grub maggot leatherjacket pupa chrysalis imago queen phoronid bryozoan, polyzoan, sea_mat, sea_moss, moss_animal brachiopod, lamp_shell, lampshell peanut_worm, sipunculid echinoderm starfish, sea_star brittle_star, brittle-star, serpent_star basket_star, basket_fish Astrophyton_muricatum sea_urchin edible_sea_urchin, Echinus_esculentus sand_dollar heart_urchin crinoid sea_lily feather_star, comatulid sea_cucumber, holothurian trepang, Holothuria_edulis Duplicidentata lagomorph, gnawing_mammal leporid, leporid_mammal rabbit, coney, cony rabbit_ears lapin bunny, bunny_rabbit European_rabbit, Old_World_rabbit, Oryctolagus_cuniculus wood_rabbit, cottontail, cottontail_rabbit eastern_cottontail, Sylvilagus_floridanus swamp_rabbit, canecutter, swamp_hare, Sylvilagus_aquaticus marsh_hare, swamp_rabbit, Sylvilagus_palustris hare leveret European_hare, Lepus_europaeus jackrabbit white-tailed_jackrabbit, whitetail_jackrabbit, Lepus_townsendi blacktail_jackrabbit, Lepus_californicus polar_hare, Arctic_hare, Lepus_arcticus snowshoe_hare, snowshoe_rabbit, varying_hare, Lepus_americanus Belgian_hare, leporide Angora, Angora_rabbit pika, mouse_hare, rock_rabbit, coney, cony little_chief_hare, Ochotona_princeps collared_pika, Ochotona_collaris rodent, gnawer mouse rat pocket_rat murine house_mouse, Mus_musculus harvest_mouse, Micromyx_minutus field_mouse, fieldmouse nude_mouse European_wood_mouse, Apodemus_sylvaticus brown_rat, Norway_rat, Rattus_norvegicus wharf_rat sewer_rat black_rat, roof_rat, Rattus_rattus bandicoot_rat, mole_rat jerboa_rat kangaroo_mouse water_rat beaver_rat New_World_mouse American_harvest_mouse, harvest_mouse wood_mouse white-footed_mouse, vesper_mouse, Peromyscus_leucopus deer_mouse, Peromyscus_maniculatus cactus_mouse, Peromyscus_eremicus cotton_mouse, Peromyscus_gossypinus pygmy_mouse, Baiomys_taylori grasshopper_mouse muskrat, musquash, Ondatra_zibethica round-tailed_muskrat, Florida_water_rat, Neofiber_alleni cotton_rat, Sigmodon_hispidus wood_rat, wood-rat dusky-footed_wood_rat vole, field_mouse packrat, pack_rat, trade_rat, bushytail_woodrat, Neotoma_cinerea dusky-footed_woodrat, Neotoma_fuscipes eastern_woodrat, Neotoma_floridana rice_rat, Oryzomys_palustris pine_vole, pine_mouse, Pitymys_pinetorum meadow_vole, meadow_mouse, Microtus_pennsylvaticus water_vole, Richardson_vole, Microtus_richardsoni prairie_vole, Microtus_ochrogaster water_vole, water_rat, Arvicola_amphibius red-backed_mouse, redback_vole phenacomys hamster Eurasian_hamster, Cricetus_cricetus golden_hamster, Syrian_hamster, Mesocricetus_auratus gerbil, gerbille jird tamarisk_gerbil, Meriones_unguiculatus sand_rat, Meriones_longifrons lemming European_lemming, Lemmus_lemmus brown_lemming, Lemmus_trimucronatus grey_lemming, gray_lemming, red-backed_lemming pied_lemming Hudson_bay_collared_lemming, Dicrostonyx_hudsonius southern_bog_lemming, Synaptomys_cooperi northern_bog_lemming, Synaptomys_borealis porcupine, hedgehog Old_World_porcupine brush-tailed_porcupine, brush-tail_porcupine long-tailed_porcupine, Trichys_lipura New_World_porcupine Canada_porcupine, Erethizon_dorsatum pocket_mouse silky_pocket_mouse, Perognathus_flavus plains_pocket_mouse, Perognathus_flavescens hispid_pocket_mouse, Perognathus_hispidus Mexican_pocket_mouse, Liomys_irroratus kangaroo_rat, desert_rat, Dipodomys_phillipsii Ord_kangaroo_rat, Dipodomys_ordi kangaroo_mouse, dwarf_pocket_rat jumping_mouse meadow_jumping_mouse, Zapus_hudsonius jerboa typical_jerboa Jaculus_jaculus dormouse loir, Glis_glis hazel_mouse, Muscardinus_avellanarius lerot gopher, pocket_gopher, pouched_rat plains_pocket_gopher, Geomys_bursarius southeastern_pocket_gopher, Geomys_pinetis valley_pocket_gopher, Thomomys_bottae northern_pocket_gopher, Thomomys_talpoides squirrel tree_squirrel eastern_grey_squirrel, eastern_gray_squirrel, cat_squirrel, Sciurus_carolinensis western_grey_squirrel, western_gray_squirrel, Sciurus_griseus fox_squirrel, eastern_fox_squirrel, Sciurus_niger black_squirrel red_squirrel, cat_squirrel, Sciurus_vulgaris American_red_squirrel, spruce_squirrel, red_squirrel, Sciurus_hudsonicus, Tamiasciurus_hudsonicus chickeree, Douglas_squirrel, Tamiasciurus_douglasi antelope_squirrel, whitetail_antelope_squirrel, antelope_chipmunk, Citellus_leucurus ground_squirrel, gopher, spermophile mantled_ground_squirrel, Citellus_lateralis suslik, souslik, Citellus_citellus flickertail, Richardson_ground_squirrel, Citellus_richardsoni rock_squirrel, Citellus_variegatus Arctic_ground_squirrel, parka_squirrel, Citellus_parryi prairie_dog, prairie_marmot blacktail_prairie_dog, Cynomys_ludovicianus whitetail_prairie_dog, Cynomys_gunnisoni eastern_chipmunk, hackee, striped_squirrel, ground_squirrel, Tamias_striatus chipmunk baronduki, baranduki, barunduki, burunduki, Eutamius_asiaticus, Eutamius_sibiricus American_flying_squirrel southern_flying_squirrel, Glaucomys_volans northern_flying_squirrel, Glaucomys_sabrinus marmot groundhog, woodchuck, Marmota_monax hoary_marmot, whistler, whistling_marmot, Marmota_caligata yellowbelly_marmot, rockchuck, Marmota_flaviventris Asiatic_flying_squirrel beaver Old_World_beaver, Castor_fiber New_World_beaver, Castor_canadensis mountain_beaver, sewellel, Aplodontia_rufa cavy guinea_pig, Cavia_cobaya aperea, wild_cavy, Cavia_porcellus mara, Dolichotis_patagonum capybara, capibara, Hydrochoerus_hydrochaeris agouti, Dasyprocta_aguti paca, Cuniculus_paca mountain_paca coypu, nutria, Myocastor_coypus chinchilla, Chinchilla_laniger mountain_chinchilla, mountain_viscacha viscacha, chinchillon, Lagostomus_maximus abrocome, chinchilla_rat, rat_chinchilla mole_rat mole_rat sand_rat naked_mole_rat queen, queen_mole_rat Damaraland_mole_rat Ungulata ungulate, hoofed_mammal unguiculate, unguiculate_mammal dinoceras, uintathere hyrax, coney, cony, dassie, das rock_hyrax, rock_rabbit, Procavia_capensis odd-toed_ungulate, perissodactyl, perissodactyl_mammal equine, equid horse, Equus_caballus roan stablemate, stable_companion gee-gee eohippus, dawn_horse foal filly colt male_horse ridgeling, ridgling, ridgel, ridgil stallion, entire stud, studhorse gelding mare, female_horse broodmare, stud_mare saddle_horse, riding_horse, mount remount palfrey warhorse cavalry_horse charger, courser steed prancer hack cow_pony quarter_horse Morgan Tennessee_walker, Tennessee_walking_horse, Walking_horse, Plantation_walking_horse American_saddle_horse Appaloosa Arabian, Arab Lippizan, Lipizzan, Lippizaner pony polo_pony mustang bronco, bronc, broncho bucking_bronco buckskin crowbait, crow-bait dun grey, gray wild_horse tarpan, Equus_caballus_gomelini Przewalski's_horse, Przevalski's_horse, Equus_caballus_przewalskii, Equus_caballus_przevalskii cayuse, Indian_pony hack hack, jade, nag, plug plow_horse, plough_horse pony Shetland_pony Welsh_pony Exmoor racehorse, race_horse, bangtail thoroughbred steeplechaser racer finisher pony yearling dark_horse mudder nonstarter stalking-horse harness_horse cob hackney workhorse draft_horse, draught_horse, dray_horse packhorse carthorse, cart_horse, drayhorse Clydesdale Percheron farm_horse, dobbin shire, shire_horse pole_horse, poler post_horse, post-horse, poster coach_horse pacer pacer, pacemaker, pacesetter trotting_horse, trotter pole_horse stepper, high_stepper chestnut liver_chestnut bay sorrel palomino pinto ass domestic_ass, donkey, Equus_asinus burro moke jack, jackass jennet, jenny, jenny_ass mule hinny wild_ass African_wild_ass, Equus_asinus kiang, Equus_kiang onager, Equus_hemionus chigetai, dziggetai, Equus_hemionus_hemionus zebra common_zebra, Burchell's_zebra, Equus_Burchelli mountain_zebra, Equus_zebra_zebra grevy's_zebra, Equus_grevyi quagga, Equus_quagga rhinoceros, rhino Indian_rhinoceros, Rhinoceros_unicornis woolly_rhinoceros, Rhinoceros_antiquitatis white_rhinoceros, Ceratotherium_simum, Diceros_simus black_rhinoceros, Diceros_bicornis tapir New_World_tapir, Tapirus_terrestris Malayan_tapir, Indian_tapir, Tapirus_indicus even-toed_ungulate, artiodactyl, artiodactyl_mammal swine hog, pig, grunter, squealer, Sus_scrofa piglet, piggy, shoat, shote sucking_pig porker boar sow razorback, razorback_hog, razorbacked_hog wild_boar, boar, Sus_scrofa babirusa, babiroussa, babirussa, Babyrousa_Babyrussa warthog peccary, musk_hog collared_peccary, javelina, Tayassu_angulatus, Tayassu_tajacu, Peccari_angulatus white-lipped_peccary, Tayassu_pecari hippopotamus, hippo, river_horse, Hippopotamus_amphibius ruminant bovid bovine ox, wild_ox cattle, cows, kine, oxen, Bos_taurus ox stirk bullock, steer bull cow, moo-cow heifer bullock dogie, dogy, leppy maverick beef, beef_cattle longhorn, Texas_longhorn Brahman, Brahma, Brahmin, Bos_indicus zebu aurochs, urus, Bos_primigenius yak, Bos_grunniens banteng, banting, tsine, Bos_banteng Welsh, Welsh_Black red_poll Santa_Gertrudis Aberdeen_Angus, Angus, black_Angus Africander dairy_cattle, dairy_cow, milch_cow, milk_cow, milcher, milker Ayrshire Brown_Swiss Charolais Jersey Devon grade Durham, shorthorn milking_shorthorn Galloway Friesian, Holstein, Holstein-Friesian Guernsey Hereford, whiteface cattalo, beefalo Old_World_buffalo, buffalo water_buffalo, water_ox, Asiatic_buffalo, Bubalus_bubalis Indian_buffalo carabao anoa, dwarf_buffalo, Anoa_depressicornis tamarau, tamarao, Bubalus_mindorensis, Anoa_mindorensis Cape_buffalo, Synercus_caffer Asian_wild_ox gaur, Bibos_gaurus gayal, mithan, Bibos_frontalis bison American_bison, American_buffalo, buffalo, Bison_bison wisent, aurochs, Bison_bonasus musk_ox, musk_sheep, Ovibos_moschatus sheep ewe ram, tup wether lamb lambkin baa-lamb hog, hogget, hogg teg Persian_lamb black_sheep domestic_sheep, Ovis_aries Cotswold Hampshire, Hampshire_down Lincoln Exmoor Cheviot broadtail, caracul, karakul longwool merino, merino_sheep Rambouillet wild_sheep argali, argal, Ovis_ammon Marco_Polo_sheep, Marco_Polo's_sheep, Ovis_poli urial, Ovis_vignei Dall_sheep, Dall's_sheep, white_sheep, Ovis_montana_dalli mountain_sheep bighorn, bighorn_sheep, cimarron, Rocky_Mountain_bighorn, Rocky_Mountain_sheep, Ovis_canadensis mouflon, moufflon, Ovis_musimon aoudad, arui, audad, Barbary_sheep, maned_sheep, Ammotragus_lervia goat, caprine_animal kid billy, billy_goat, he-goat nanny, nanny-goat, she-goat domestic_goat, Capra_hircus Cashmere_goat, Kashmir_goat Angora, Angora_goat wild_goat bezoar_goat, pasang, Capra_aegagrus markhor, markhoor, Capra_falconeri ibex, Capra_ibex goat_antelope mountain_goat, Rocky_Mountain_goat, Oreamnos_americanus goral, Naemorhedus_goral serow chamois, Rupicapra_rupicapra takin, gnu_goat, Budorcas_taxicolor antelope blackbuck, black_buck, Antilope_cervicapra gerenuk, Litocranius_walleri addax, Addax_nasomaculatus gnu, wildebeest dik-dik hartebeest sassaby, topi, Damaliscus_lunatus impala, Aepyceros_melampus gazelle Thomson's_gazelle, Gazella_thomsoni Gazella_subgutturosa springbok, springbuck, Antidorcas_marsupialis, Antidorcas_euchore bongo, Tragelaphus_eurycerus, Boocercus_eurycerus kudu, koodoo, koudou greater_kudu, Tragelaphus_strepsiceros lesser_kudu, Tragelaphus_imberbis harnessed_antelope nyala, Tragelaphus_angasi mountain_nyala, Tragelaphus_buxtoni bushbuck, guib, Tragelaphus_scriptus nilgai, nylghai, nylghau, blue_bull, Boselaphus_tragocamelus sable_antelope, Hippotragus_niger saiga, Saiga_tatarica steenbok, steinbok, Raphicerus_campestris eland common_eland, Taurotragus_oryx giant_eland, Taurotragus_derbianus kob, Kobus_kob lechwe, Kobus_leche waterbuck puku, Adenota_vardoni oryx, pasang gemsbok, gemsbuck, Oryx_gazella forest_goat, spindle_horn, Pseudoryx_nghetinhensis pronghorn, prongbuck, pronghorn_antelope, American_antelope, Antilocapra_americana deer, cervid stag royal, royal_stag pricket fawn red_deer, elk, American_elk, wapiti, Cervus_elaphus hart, stag hind brocket sambar, sambur, Cervus_unicolor wapiti, elk, American_elk, Cervus_elaphus_canadensis Japanese_deer, sika, Cervus_nipon, Cervus_sika Virginia_deer, white_tail, whitetail, white-tailed_deer, whitetail_deer, Odocoileus_Virginianus mule_deer, burro_deer, Odocoileus_hemionus black-tailed_deer, blacktail_deer, blacktail, Odocoileus_hemionus_columbianus elk, European_elk, moose, Alces_alces fallow_deer, Dama_dama roe_deer, Capreolus_capreolus roebuck caribou, reindeer, Greenland_caribou, Rangifer_tarandus woodland_caribou, Rangifer_caribou barren_ground_caribou, Rangifer_arcticus brocket muntjac, barking_deer musk_deer, Moschus_moschiferus pere_david's_deer, elaphure, Elaphurus_davidianus chevrotain, mouse_deer kanchil, Tragulus_kanchil napu, Tragulus_Javanicus water_chevrotain, water_deer, Hyemoschus_aquaticus camel Arabian_camel, dromedary, Camelus_dromedarius Bactrian_camel, Camelus_bactrianus llama domestic_llama, Lama_peruana guanaco, Lama_guanicoe alpaca, Lama_pacos vicuna, Vicugna_vicugna giraffe, camelopard, Giraffa_camelopardalis okapi, Okapia_johnstoni musteline_mammal, mustelid, musteline weasel ermine, shorttail_weasel, Mustela_erminea stoat New_World_least_weasel, Mustela_rixosa Old_World_least_weasel, Mustela_nivalis longtail_weasel, long-tailed_weasel, Mustela_frenata mink American_mink, Mustela_vison polecat, fitch, foulmart, foumart, Mustela_putorius ferret black-footed_ferret, ferret, Mustela_nigripes muishond snake_muishond, Poecilogale_albinucha striped_muishond, Ictonyx_striata otter river_otter, Lutra_canadensis Eurasian_otter, Lutra_lutra sea_otter, Enhydra_lutris skunk, polecat, wood_pussy striped_skunk, Mephitis_mephitis hooded_skunk, Mephitis_macroura hog-nosed_skunk, hognosed_skunk, badger_skunk, rooter_skunk, Conepatus_leuconotus spotted_skunk, little_spotted_skunk, Spilogale_putorius badger American_badger, Taxidea_taxus Eurasian_badger, Meles_meles ratel, honey_badger, Mellivora_capensis ferret_badger hog_badger, hog-nosed_badger, sand_badger, Arctonyx_collaris wolverine, carcajou, skunk_bear, Gulo_luscus glutton, Gulo_gulo, wolverine grison, Grison_vittatus, Galictis_vittatus marten, marten_cat pine_marten, Martes_martes sable, Martes_zibellina American_marten, American_sable, Martes_americana stone_marten, beech_marten, Martes_foina fisher, pekan, fisher_cat, black_cat, Martes_pennanti yellow-throated_marten, Charronia_flavigula tayra, taira, Eira_barbara fictional_animal pachyderm edentate armadillo peba, nine-banded_armadillo, Texas_armadillo, Dasypus_novemcinctus apar, three-banded_armadillo, Tolypeutes_tricinctus tatouay, cabassous, Cabassous_unicinctus peludo, poyou, Euphractus_sexcinctus giant_armadillo, tatou, tatu, Priodontes_giganteus pichiciago, pichiciego, fairy_armadillo, chlamyphore, Chlamyphorus_truncatus sloth, tree_sloth three-toed_sloth, ai, Bradypus_tridactylus two-toed_sloth, unau, unai, Choloepus_didactylus two-toed_sloth, unau, unai, Choloepus_hoffmanni megatherian, megatheriid, megatherian_mammal mylodontid anteater, New_World_anteater ant_bear, giant_anteater, great_anteater, tamanoir, Myrmecophaga_jubata silky_anteater, two-toed_anteater, Cyclopes_didactylus tamandua, tamandu, lesser_anteater, Tamandua_tetradactyla pangolin, scaly_anteater, anteater coronet scapular tadpole, polliwog, pollywog primate simian ape anthropoid anthropoid_ape hominoid hominid homo, man, human_being, human world, human_race, humanity, humankind, human_beings, humans, mankind, man Homo_erectus Pithecanthropus, Pithecanthropus_erectus, genus_Pithecanthropus Java_man, Trinil_man Peking_man Sinanthropus, genus_Sinanthropus Homo_soloensis Javanthropus, genus_Javanthropus Homo_habilis Homo_sapiens Neandertal_man, Neanderthal_man, Neandertal, Neanderthal, Homo_sapiens_neanderthalensis Cro-magnon Homo_sapiens_sapiens, modern_man australopithecine Australopithecus_afarensis Australopithecus_africanus Australopithecus_boisei Zinjanthropus, genus_Zinjanthropus Australopithecus_robustus Paranthropus, genus_Paranthropus Sivapithecus rudapithecus, Dryopithecus_Rudapithecus_hungaricus proconsul Aegyptopithecus great_ape, pongid orangutan, orang, orangutang, Pongo_pygmaeus gorilla, Gorilla_gorilla western_lowland_gorilla, Gorilla_gorilla_gorilla eastern_lowland_gorilla, Gorilla_gorilla_grauri mountain_gorilla, Gorilla_gorilla_beringei silverback chimpanzee, chimp, Pan_troglodytes western_chimpanzee, Pan_troglodytes_verus eastern_chimpanzee, Pan_troglodytes_schweinfurthii central_chimpanzee, Pan_troglodytes_troglodytes pygmy_chimpanzee, bonobo, Pan_paniscus lesser_ape gibbon, Hylobates_lar siamang, Hylobates_syndactylus, Symphalangus_syndactylus monkey Old_World_monkey, catarrhine guenon, guenon_monkey talapoin, Cercopithecus_talapoin grivet, Cercopithecus_aethiops vervet, vervet_monkey, Cercopithecus_aethiops_pygerythrus green_monkey, African_green_monkey, Cercopithecus_aethiops_sabaeus mangabey patas, hussar_monkey, Erythrocebus_patas baboon chacma, chacma_baboon, Papio_ursinus mandrill, Mandrillus_sphinx drill, Mandrillus_leucophaeus macaque rhesus, rhesus_monkey, Macaca_mulatta bonnet_macaque, bonnet_monkey, capped_macaque, crown_monkey, Macaca_radiata Barbary_ape, Macaca_sylvana crab-eating_macaque, croo_monkey, Macaca_irus langur entellus, hanuman, Presbytes_entellus, Semnopithecus_entellus colobus, colobus_monkey guereza, Colobus_guereza proboscis_monkey, Nasalis_larvatus New_World_monkey, platyrrhine, platyrrhinian marmoset true_marmoset pygmy_marmoset, Cebuella_pygmaea tamarin, lion_monkey, lion_marmoset, leoncita silky_tamarin, Leontocebus_rosalia pinche, Leontocebus_oedipus capuchin, ringtail, Cebus_capucinus douroucouli, Aotus_trivirgatus howler_monkey, howler saki uakari titi, titi_monkey spider_monkey, Ateles_geoffroyi squirrel_monkey, Saimiri_sciureus woolly_monkey tree_shrew prosimian lemur Madagascar_cat, ring-tailed_lemur, Lemur_catta aye-aye, Daubentonia_madagascariensis slender_loris, Loris_gracilis slow_loris, Nycticebus_tardigradua, Nycticebus_pygmaeus potto, kinkajou, Perodicticus_potto angwantibo, golden_potto, Arctocebus_calabarensis galago, bushbaby, bush_baby indri, indris, Indri_indri, Indri_brevicaudatus woolly_indris, Avahi_laniger tarsier Tarsius_syrichta Tarsius_glis flying_lemur, flying_cat, colugo Cynocephalus_variegatus proboscidean, proboscidian elephant rogue_elephant Indian_elephant, Elephas_maximus African_elephant, Loxodonta_africana mammoth woolly_mammoth, northern_mammoth, Mammuthus_primigenius columbian_mammoth, Mammuthus_columbi imperial_mammoth, imperial_elephant, Archidiskidon_imperator mastodon, mastodont plantigrade_mammal, plantigrade digitigrade_mammal, digitigrade procyonid raccoon, racoon common_raccoon, common_racoon, coon, ringtail, Procyon_lotor crab-eating_raccoon, Procyon_cancrivorus bassarisk, cacomistle, cacomixle, coon_cat, raccoon_fox, ringtail, ring-tailed_cat, civet_cat, miner's_cat, Bassariscus_astutus kinkajou, honey_bear, potto, Potos_flavus, Potos_caudivolvulus coati, coati-mondi, coati-mundi, coon_cat, Nasua_narica lesser_panda, red_panda, panda, bear_cat, cat_bear, Ailurus_fulgens giant_panda, panda, panda_bear, coon_bear, Ailuropoda_melanoleuca twitterer fish fingerling game_fish, sport_fish food_fish rough_fish groundfish, bottom_fish young_fish parr mouthbreeder spawner barracouta, snoek crossopterygian, lobefin, lobe-finned_fish coelacanth, Latimeria_chalumnae lungfish ceratodus catfish, siluriform_fish silurid, silurid_fish European_catfish, sheatfish, Silurus_glanis electric_catfish, Malopterurus_electricus bullhead, bullhead_catfish horned_pout, hornpout, pout, Ameiurus_Melas brown_bullhead channel_catfish, channel_cat, Ictalurus_punctatus blue_catfish, blue_cat, blue_channel_catfish, blue_channel_cat flathead_catfish, mudcat, goujon, shovelnose_catfish, spoonbill_catfish, Pylodictus_olivaris armored_catfish sea_catfish gadoid, gadoid_fish cod, codfish codling Atlantic_cod, Gadus_morhua Pacific_cod, Alaska_cod, Gadus_macrocephalus whiting, Merlangus_merlangus, Gadus_merlangus burbot, eelpout, ling, cusk, Lota_lota haddock, Melanogrammus_aeglefinus pollack, pollock, Pollachius_pollachius hake silver_hake, Merluccius_bilinearis, whiting ling cusk, torsk, Brosme_brosme grenadier, rattail, rattail_fish eel elver common_eel, freshwater_eel tuna, Anguilla_sucklandii moray, moray_eel conger, conger_eel teleost_fish, teleost, teleostan beaked_salmon, sandfish, Gonorhynchus_gonorhynchus clupeid_fish, clupeid whitebait brit, britt shad common_American_shad, Alosa_sapidissima river_shad, Alosa_chrysocloris allice_shad, allis_shad, allice, allis, Alosa_alosa alewife, Alosa_pseudoharengus, Pomolobus_pseudoharengus menhaden, Brevoortia_tyrannis herring, Clupea_harangus Atlantic_herring, Clupea_harengus_harengus Pacific_herring, Clupea_harengus_pallasii sardine sild brisling, sprat, Clupea_sprattus pilchard, sardine, Sardina_pilchardus Pacific_sardine, Sardinops_caerulea anchovy mediterranean_anchovy, Engraulis_encrasicholus salmonid salmon parr blackfish redfish Atlantic_salmon, Salmo_salar landlocked_salmon, lake_salmon sockeye, sockeye_salmon, red_salmon, blueback_salmon, Oncorhynchus_nerka chinook, chinook_salmon, king_salmon, quinnat_salmon, Oncorhynchus_tshawytscha coho, cohoe, coho_salmon, blue_jack, silver_salmon, Oncorhynchus_kisutch trout brown_trout, salmon_trout, Salmo_trutta rainbow_trout, Salmo_gairdneri sea_trout lake_trout, salmon_trout, Salvelinus_namaycush brook_trout, speckled_trout, Salvelinus_fontinalis char, charr Arctic_char, Salvelinus_alpinus whitefish lake_whitefish, Coregonus_clupeaformis cisco, lake_herring, Coregonus_artedi round_whitefish, Menominee_whitefish, Prosopium_cylindraceum smelt sparling, European_smelt, Osmerus_eperlanus capelin, capelan, caplin tarpon, Tarpon_atlanticus ladyfish, tenpounder, Elops_saurus bonefish, Albula_vulpes argentine lanternfish lizardfish, snakefish, snake-fish lancetfish, lancet_fish, wolffish opah, moonfish, Lampris_regius New_World_opah, Lampris_guttatus ribbonfish dealfish, Trachipterus_arcticus oarfish, king_of_the_herring, ribbonfish, Regalecus_glesne batfish goosefish, angler, anglerfish, angler_fish, monkfish, lotte, allmouth, Lophius_Americanus toadfish, Opsanus_tau oyster_fish, oyster-fish, oysterfish frogfish sargassum_fish needlefish, gar, billfish timucu flying_fish monoplane_flying_fish, two-wing_flying_fish halfbeak saury, billfish, Scomberesox_saurus spiny-finned_fish, acanthopterygian lingcod, Ophiodon_elongatus percoid_fish, percoid, percoidean perch climbing_perch, Anabas_testudineus, A._testudineus perch yellow_perch, Perca_flavescens European_perch, Perca_fluviatilis pike-perch, pike_perch walleye, walleyed_pike, jack_salmon, dory, Stizostedion_vitreum blue_pike, blue_pickerel, blue_pikeperch, blue_walleye, Strizostedion_vitreum_glaucum snail_darter, Percina_tanasi cusk-eel brotula pearlfish, pearl-fish robalo snook pike northern_pike, Esox_lucius muskellunge, Esox_masquinongy pickerel chain_pickerel, chain_pike, Esox_niger redfin_pickerel, barred_pickerel, Esox_americanus sunfish, centrarchid crappie black_crappie, Pomoxis_nigromaculatus white_crappie, Pomoxis_annularis freshwater_bream, bream pumpkinseed, Lepomis_gibbosus bluegill, Lepomis_macrochirus spotted_sunfish, stumpknocker, Lepomis_punctatus freshwater_bass rock_bass, rock_sunfish, Ambloplites_rupestris black_bass Kentucky_black_bass, spotted_black_bass, Micropterus_pseudoplites smallmouth, smallmouth_bass, smallmouthed_bass, smallmouth_black_bass, smallmouthed_black_bass, Micropterus_dolomieu largemouth, largemouth_bass, largemouthed_bass, largemouth_black_bass, largemouthed_black_bass, Micropterus_salmoides bass serranid_fish, serranid white_perch, silver_perch, Morone_americana yellow_bass, Morone_interrupta blackmouth_bass, Synagrops_bellus rock_sea_bass, rock_bass, Centropristis_philadelphica striped_bass, striper, Roccus_saxatilis, rockfish stone_bass, wreckfish, Polyprion_americanus grouper hind rock_hind, Epinephelus_adscensionis creole-fish, Paranthias_furcifer jewfish, Mycteroperca_bonaci soapfish surfperch, surffish, surf_fish rainbow_seaperch, rainbow_perch, Hipsurus_caryi bigeye catalufa, Priacanthus_arenatus cardinalfish flame_fish, flamefish, Apogon_maculatus tilefish, Lopholatilus_chamaeleonticeps bluefish, Pomatomus_saltatrix cobia, Rachycentron_canadum, sergeant_fish remora, suckerfish, sucking_fish sharksucker, Echeneis_naucrates whale_sucker, whalesucker, Remilegia_australis carangid_fish, carangid jack crevalle_jack, jack_crevalle, Caranx_hippos yellow_jack, Caranx_bartholomaei runner, blue_runner, Caranx_crysos rainbow_runner, Elagatis_bipinnulata leatherjacket, leatherjack threadfish, thread-fish, Alectis_ciliaris moonfish, Atlantic_moonfish, horsefish, horsehead, horse-head, dollarfish, Selene_setapinnis lookdown, lookdown_fish, Selene_vomer amberjack, amberfish yellowtail, Seriola_dorsalis kingfish, Seriola_grandis pompano Florida_pompano, Trachinotus_carolinus permit, Trachinotus_falcatus scad horse_mackerel, jack_mackerel, Spanish_mackerel, saurel, Trachurus_symmetricus horse_mackerel, saurel, Trachurus_trachurus bigeye_scad, big-eyed_scad, goggle-eye, Selar_crumenophthalmus mackerel_scad, mackerel_shad, Decapterus_macarellus round_scad, cigarfish, quiaquia, Decapterus_punctatus dolphinfish, dolphin, mahimahi Coryphaena_hippurus Coryphaena_equisetis pomfret, Brama_raii characin, characin_fish, characid tetra cardinal_tetra, Paracheirodon_axelrodi piranha, pirana, caribe cichlid, cichlid_fish bolti, Tilapia_nilotica snapper red_snapper, Lutjanus_blackfordi grey_snapper, gray_snapper, mangrove_snapper, Lutjanus_griseus mutton_snapper, muttonfish, Lutjanus_analis schoolmaster, Lutjanus_apodus yellowtail, yellowtail_snapper, Ocyurus_chrysurus grunt margate, Haemulon_album Spanish_grunt, Haemulon_macrostomum tomtate, Haemulon_aurolineatum cottonwick, Haemulon_malanurum sailor's-choice, sailors_choice, Haemulon_parra porkfish, pork-fish, Anisotremus_virginicus pompon, black_margate, Anisotremus_surinamensis pigfish, hogfish, Orthopristis_chrysopterus sparid, sparid_fish sea_bream, bream porgy red_porgy, Pagrus_pagrus European_sea_bream, Pagellus_centrodontus Atlantic_sea_bream, Archosargus_rhomboidalis sheepshead, Archosargus_probatocephalus pinfish, sailor's-choice, squirrelfish, Lagodon_rhomboides sheepshead_porgy, Calamus_penna snapper, Chrysophrys_auratus black_bream, Chrysophrys_australis scup, northern_porgy, northern_scup, Stenotomus_chrysops scup, southern_porgy, southern_scup, Stenotomus_aculeatus sciaenid_fish, sciaenid striped_drum, Equetus_pulcher jackknife-fish, Equetus_lanceolatus silver_perch, mademoiselle, Bairdiella_chrysoura red_drum, channel_bass, redfish, Sciaenops_ocellatus mulloway, jewfish, Sciaena_antarctica maigre, maiger, Sciaena_aquila croaker Atlantic_croaker, Micropogonias_undulatus yellowfin_croaker, surffish, surf_fish, Umbrina_roncador whiting kingfish king_whiting, Menticirrhus_americanus northern_whiting, Menticirrhus_saxatilis corbina, Menticirrhus_undulatus white_croaker, chenfish, kingfish, Genyonemus_lineatus white_croaker, queenfish, Seriphus_politus sea_trout weakfish, Cynoscion_regalis spotted_weakfish, spotted_sea_trout, spotted_squeateague, Cynoscion_nebulosus mullet goatfish, red_mullet, surmullet, Mullus_surmuletus red_goatfish, Mullus_auratus yellow_goatfish, Mulloidichthys_martinicus mullet, grey_mullet, gray_mullet striped_mullet, Mugil_cephalus white_mullet, Mugil_curema liza, Mugil_liza silversides, silverside jacksmelt, Atherinopsis_californiensis barracuda great_barracuda, Sphyraena_barracuda sweeper sea_chub Bermuda_chub, rudderfish, Kyphosus_sectatrix spadefish, angelfish, Chaetodipterus_faber butterfly_fish chaetodon angelfish rock_beauty, Holocanthus_tricolor damselfish, demoiselle beaugregory, Pomacentrus_leucostictus anemone_fish clown_anemone_fish, Amphiprion_percula sergeant_major, Abudefduf_saxatilis wrasse pigfish, giant_pigfish, Achoerodus_gouldii hogfish, hog_snapper, Lachnolaimus_maximus slippery_dick, Halicoeres_bivittatus puddingwife, pudding-wife, Halicoeres_radiatus bluehead, Thalassoma_bifasciatum pearly_razorfish, Hemipteronatus_novacula tautog, blackfish, Tautoga_onitis cunner, bergall, Tautogolabrus_adspersus parrotfish, polly_fish, pollyfish threadfin jawfish stargazer sand_stargazer blenny, combtooth_blenny shanny, Blennius_pholis Molly_Miller, Scartella_cristata clinid, clinid_fish pikeblenny bluethroat_pikeblenny, Chaenopsis_ocellata gunnel, bracketed_blenny rock_gunnel, butterfish, Pholis_gunnellus eelblenny wrymouth, ghostfish, Cryptacanthodes_maculatus wolffish, wolf_fish, catfish viviparous_eelpout, Zoarces_viviparus ocean_pout, Macrozoarces_americanus sand_lance, sand_launce, sand_eel, launce dragonet goby, gudgeon mudskipper, mudspringer sleeper, sleeper_goby flathead archerfish, Toxotes_jaculatrix surgeonfish gempylid snake_mackerel, Gempylus_serpens escolar, Lepidocybium_flavobrunneum oilfish, Ruvettus_pretiosus cutlassfish, frost_fish, hairtail scombroid, scombroid_fish mackerel common_mackerel, shiner, Scomber_scombrus Spanish_mackerel, Scomber_colias chub_mackerel, tinker, Scomber_japonicus wahoo, Acanthocybium_solandri Spanish_mackerel king_mackerel, cavalla, cero, Scomberomorus_cavalla Scomberomorus_maculatus cero, pintado, kingfish, Scomberomorus_regalis sierra, Scomberomorus_sierra tuna, tunny albacore, long-fin_tunny, Thunnus_alalunga bluefin, bluefin_tuna, horse_mackerel, Thunnus_thynnus yellowfin, yellowfin_tuna, Thunnus_albacares bonito skipjack, Atlantic_bonito, Sarda_sarda Chile_bonito, Chilean_bonito, Pacific_bonito, Sarda_chiliensis skipjack, skipjack_tuna, Euthynnus_pelamis bonito, oceanic_bonito, Katsuwonus_pelamis swordfish, Xiphias_gladius sailfish Atlantic_sailfish, Istiophorus_albicans billfish marlin blue_marlin, Makaira_nigricans black_marlin, Makaira_mazara, Makaira_marlina striped_marlin, Makaira_mitsukurii white_marlin, Makaira_albida spearfish louvar, Luvarus_imperialis dollarfish, Poronotus_triacanthus palometa, California_pompano, Palometa_simillima harvestfish, Paprilus_alepidotus driftfish barrelfish, black_rudderfish, Hyperglyphe_perciformis clingfish tripletail Atlantic_tripletail, Lobotes_surinamensis Pacific_tripletail, Lobotes_pacificus mojarra yellowfin_mojarra, Gerres_cinereus silver_jenny, Eucinostomus_gula whiting ganoid, ganoid_fish bowfin, grindle, dogfish, Amia_calva paddlefish, duckbill, Polyodon_spathula Chinese_paddlefish, Psephurus_gladis sturgeon Pacific_sturgeon, white_sturgeon, Sacramento_sturgeon, Acipenser_transmontanus beluga, hausen, white_sturgeon, Acipenser_huso gar, garfish, garpike, billfish, Lepisosteus_osseus scorpaenoid, scorpaenoid_fish scorpaenid, scorpaenid_fish scorpionfish, scorpion_fish, sea_scorpion plumed_scorpionfish, Scorpaena_grandicornis lionfish stonefish, Synanceja_verrucosa rockfish copper_rockfish, Sebastodes_caurinus vermillion_rockfish, rasher, Sebastodes_miniatus red_rockfish, Sebastodes_ruberrimus rosefish, ocean_perch, Sebastodes_marinus bullhead miller's-thumb sea_raven, Hemitripterus_americanus lumpfish, Cyclopterus_lumpus lumpsucker pogge, armed_bullhead, Agonus_cataphractus greenling kelp_greenling, Hexagrammos_decagrammus painted_greenling, convict_fish, convictfish, Oxylebius_pictus flathead gurnard tub_gurnard, yellow_gurnard, Trigla_lucerna sea_robin, searobin northern_sea_robin, Prionotus_carolinus flying_gurnard, flying_robin, butterflyfish plectognath, plectognath_fish triggerfish queen_triggerfish, Bessy_cerca, oldwench, oldwife, Balistes_vetula filefish leatherjacket, leatherfish boxfish, trunkfish cowfish, Lactophrys_quadricornis puffer, pufferfish, blowfish, globefish spiny_puffer porcupinefish, porcupine_fish, Diodon_hystrix balloonfish, Diodon_holocanthus burrfish ocean_sunfish, sunfish, mola, headfish sharptail_mola, Mola_lanceolata flatfish flounder righteye_flounder, righteyed_flounder plaice, Pleuronectes_platessa European_flatfish, Platichthys_flesus yellowtail_flounder, Limanda_ferruginea winter_flounder, blackback_flounder, lemon_sole, Pseudopleuronectes_americanus lemon_sole, Microstomus_kitt American_plaice, Hippoglossoides_platessoides halibut, holibut Atlantic_halibut, Hippoglossus_hippoglossus Pacific_halibut, Hippoglossus_stenolepsis lefteye_flounder, lefteyed_flounder southern_flounder, Paralichthys_lethostigmus summer_flounder, Paralichthys_dentatus whiff horned_whiff, Citharichthys_cornutus sand_dab windowpane, Scophthalmus_aquosus brill, Scophthalmus_rhombus turbot, Psetta_maxima tonguefish, tongue-fish sole European_sole, Solea_solea English_sole, lemon_sole, Parophrys_vitulus hogchoker, Trinectes_maculatus aba abacus abandoned_ship, derelict A_battery abattoir, butchery, shambles, slaughterhouse abaya Abbe_condenser abbey abbey abbey Abney_level abrader, abradant abrading_stone abutment abutment_arch academic_costume academic_gown, academic_robe, judge's_robe accelerator, throttle, throttle_valve accelerator, particle_accelerator, atom_smasher accelerator, accelerator_pedal, gas_pedal, gas, throttle, gun accelerometer accessory, accoutrement, accouterment accommodating_lens_implant, accommodating_IOL accommodation accordion, piano_accordion, squeeze_box acetate_disk, phonograph_recording_disk acetate_rayon, acetate achromatic_lens acoustic_delay_line, sonic_delay_line acoustic_device acoustic_guitar acoustic_modem acropolis acrylic acrylic, acrylic_paint actinometer action, action_mechanism active_matrix_screen actuator adapter, adaptor adder adding_machine, totalizer, totaliser addressing_machine, Addressograph adhesive_bandage adit adjoining_room adjustable_wrench, adjustable_spanner adobe, adobe_brick adz, adze aeolian_harp, aeolian_lyre, wind_harp aerator aerial_torpedo aerosol, aerosol_container, aerosol_can, aerosol_bomb, spray_can Aertex afghan Afro-wig afterburner after-shave, after-shave_lotion agateware agglomerator aglet, aiglet, aiguilette aglet, aiglet agora, public_square aigrette, aigret aileron air_bag airbrake airbrush airbus air_compressor air_conditioner, air_conditioning aircraft aircraft_carrier, carrier, flattop, attack_aircraft_carrier aircraft_engine air_cushion, air_spring airdock, hangar, repair_shed airfield, landing_field, flying_field, field air_filter, air_cleaner airfoil, aerofoil, control_surface, surface airframe air_gun, airgun, air_rifle air_hammer, jackhammer, pneumatic_hammer air_horn airing_cupboard airliner airmailer airplane, aeroplane, plane airplane_propeller, airscrew, prop airport, airdrome, aerodrome, drome air_pump, vacuum_pump air_search_radar airship, dirigible air_terminal, airport_terminal air-to-air_missile air-to-ground_missile, air-to-surface_missile aisle Aladdin's_lamp alarm, warning_device, alarm_system alarm_clock, alarm alb alcazar alcohol_thermometer, alcohol-in-glass_thermometer alehouse alembic algometer alidade, alidad alidade, alidad A-line Allen_screw Allen_wrench alligator_wrench alms_dish, alms_tray alpaca alpenstock altar altar, communion_table, Lord's_table altarpiece, reredos altazimuth alternator altimeter Amati ambulance amen_corner American_organ ammeter ammonia_clock ammunition, ammo amphibian, amphibious_aircraft amphibian, amphibious_vehicle amphitheater, amphitheatre, coliseum amphitheater, amphitheatre amphora amplifier ampulla amusement_arcade analog_clock analog_computer, analogue_computer analog_watch analytical_balance, chemical_balance analyzer, analyser anamorphosis, anamorphism anastigmat anchor, ground_tackle anchor_chain, anchor_rope anchor_light, riding_light, riding_lamp AND_circuit, AND_gate andiron, firedog, dog, dog-iron android, humanoid, mechanical_man anechoic_chamber anemometer, wind_gauge, wind_gage aneroid_barometer, aneroid angiocardiogram angioscope angle_bracket, angle_iron angledozer ankle_brace anklet, anklets, bobbysock, bobbysocks anklet ankus anode anode answering_machine antenna, aerial, transmitting_aerial anteroom, antechamber, entrance_hall, hall, foyer, lobby, vestibule antiaircraft, antiaircraft_gun, flak, flack, pom-pom, ack-ack, ack-ack_gun antiballistic_missile, ABM antifouling_paint anti-G_suit, G_suit antimacassar antiperspirant anti-submarine_rocket anvil ao_dai apadana apartment, flat apartment_building, apartment_house aperture aperture apiary, bee_house apparatus, setup apparel, wearing_apparel, dress, clothes applecart appliance appliance, contraption, contrivance, convenience, gadget, gizmo, gismo, widget applicator, applier appointment, fitting apron apron_string apse, apsis aqualung, Aqua-Lung, scuba aquaplane aquarium, fish_tank, marine_museum arabesque arbor, arbour, bower, pergola arcade, colonnade arch architecture architrave arch_support arc_lamp, arc_light arctic, galosh, golosh, rubber, gumshoe area areaway argyle, argyll ark arm armament armature armband armchair armet arm_guard, arm_pad armhole armilla armlet, arm_band armoire armor, armour armored_car, armoured_car armored_car, armoured_car armored_personnel_carrier, armoured_personnel_carrier, APC armored_vehicle, armoured_vehicle armor_plate, armour_plate, armor_plating, plate_armor, plate_armour armory, armoury, arsenal armrest arquebus, harquebus, hackbut, hagbut array array, raiment, regalia arrester, arrester_hook arrow arsenal, armory, armoury arterial_road arthrogram arthroscope artificial_heart artificial_horizon, gyro_horizon, flight_indicator artificial_joint artificial_kidney, hemodialyzer artificial_skin artillery, heavy_weapon, gun, ordnance artillery_shell artist's_loft art_school ascot ashcan, trash_can, garbage_can, wastebin, ash_bin, ash-bin, ashbin, dustbin, trash_barrel, trash_bin ash-pan ashtray aspergill, aspersorium aspersorium aspirator aspirin_powder, headache_powder assault_gun assault_rifle, assault_gun assegai, assagai assembly assembly assembly_hall assembly_plant astatic_coils astatic_galvanometer astrodome astrolabe astronomical_telescope astronomy_satellite athenaeum, atheneum athletic_sock, sweat_sock, varsity_sock athletic_supporter, supporter, suspensor, jockstrap, jock atlas, telamon atmometer, evaporometer atom_bomb, atomic_bomb, A-bomb, fission_bomb, plutonium_bomb atomic_clock atomic_pile, atomic_reactor, pile, chain_reactor atomizer, atomiser, spray, sprayer, nebulizer, nebuliser atrium attache_case, attache attachment, bond attack_submarine attenuator attic attic_fan attire, garb, dress audio_amplifier audiocassette audio_CD, audio_compact_disc audiometer, sonometer audio_system, sound_system audiotape audiotape audiovisual, audiovisual_aid auditorium auger, gimlet, screw_auger, wimble autobahn autoclave, sterilizer, steriliser autofocus autogiro, autogyro, gyroplane autoinjector autoloader, self-loader automat automat automatic_choke automatic_firearm, automatic_gun, automatic_weapon automatic_pistol, automatic automatic_rifle, automatic, machine_rifle automatic_transmission, automatic_drive automation automaton, robot, golem automobile_engine automobile_factory, auto_factory, car_factory automobile_horn, car_horn, motor_horn, horn, hooter autopilot, automatic_pilot, robot_pilot autoradiograph autostrada auxiliary_boiler, donkey_boiler auxiliary_engine, donkey_engine auxiliary_pump, donkey_pump auxiliary_research_submarine auxiliary_storage, external_storage, secondary_storage aviary, bird_sanctuary, volary awl awning, sunshade, sunblind ax, axe ax_handle, axe_handle ax_head, axe_head axis, axis_of_rotation axle axle_bar axletree babushka baby_bed, baby's_bed baby_buggy, baby_carriage, carriage, perambulator, pram, stroller, go-cart, pushchair, pusher baby_grand, baby_grand_piano, parlor_grand, parlor_grand_piano, parlour_grand, parlour_grand_piano baby_powder baby_shoe back, backrest back backbench backboard backboard, basketball_backboard backbone back_brace backgammon_board background, desktop, screen_background backhoe backlighting backpack, back_pack, knapsack, packsack, rucksack, haversack backpacking_tent, pack_tent backplate back_porch backsaw, back_saw backscratcher backseat backspace_key, backspace, backspacer backstairs backstay backstop backsword backup_system badminton_court badminton_equipment badminton_racket, badminton_racquet, battledore bag bag, traveling_bag, travelling_bag, grip, suitcase bag, handbag, pocketbook, purse baggage, luggage baggage baggage_car, luggage_van baggage_claim bagpipe bailey bailey Bailey_bridge bain-marie bait, decoy, lure baize bakery, bakeshop, bakehouse balaclava, balaclava_helmet balalaika balance balance_beam, beam balance_wheel, balance balbriggan balcony balcony baldachin baldric, baldrick bale baling_wire ball ball ball_and_chain ball-and-socket_joint ballast, light_ballast ball_bearing, needle_bearing, roller_bearing ball_cartridge ballcock, ball_cock balldress ballet_skirt, tutu ball_gown ballistic_galvanometer ballistic_missile ballistic_pendulum ballistocardiograph, cardiograph balloon balloon_bomb, Fugo balloon_sail ballot_box ballpark, park ball-peen_hammer ballpoint, ballpoint_pen, ballpen, Biro ballroom, dance_hall, dance_palace ball_valve balsa_raft, Kon_Tiki baluster banana_boat band bandage, patch Band_Aid bandanna, bandana bandbox banderilla bandoleer, bandolier bandoneon bandsaw, band_saw bandwagon bangalore_torpedo bangle, bauble, gaud, gewgaw, novelty, fallal, trinket banjo banner, streamer bannister, banister, balustrade, balusters, handrail banquette banyan, banian baptismal_font, baptistry, baptistery, font bar bar barbecue, barbeque barbed_wire, barbwire barbell barber_chair barbershop barbette_carriage barbican, barbacan bar_bit bareboat barge, flatboat, hoy, lighter barge_pole baritone, baritone_horn bark, barque bar_magnet bar_mask barn barndoor barn_door barnyard barograph barometer barong barouche bar_printer barrack barrage_balloon barrel, cask barrel, gun_barrel barrelhouse, honky-tonk barrel_knot, blood_knot barrel_organ, grind_organ, hand_organ, hurdy_gurdy, hurdy-gurdy, street_organ barrel_vault barrette barricade barrier barroom, bar, saloon, ginmill, taproom barrow, garden_cart, lawn_cart, wheelbarrow bascule base, pedestal, stand base, bag baseball baseball_bat, lumber baseball_cap, jockey_cap, golf_cap baseball_equipment baseball_glove, glove, baseball_mitt, mitt basement, cellar basement basic_point_defense_missile_system basilica, Roman_basilica basilica basilisk basin basinet basket, handbasket basket, basketball_hoop, hoop basketball basketball_court basketball_equipment basket_weave bass bass_clarinet bass_drum, gran_casa basset_horn bass_fiddle, bass_viol, bull_fiddle, double_bass, contrabass, string_bass bass_guitar bass_horn, sousaphone, tuba bassinet bassinet bassoon baster bastinado bastion bastion, citadel bat bath bath_chair bathhouse, bagnio bathhouse, bathing_machine bathing_cap, swimming_cap bath_oil bathrobe bathroom, bath bath_salts bath_towel bathtub, bathing_tub, bath, tub bathyscaphe, bathyscaph, bathyscape bathysphere batik batiste baton, wand baton baton baton battering_ram batter's_box battery, electric_battery battery, stamp_battery batting_cage, cage batting_glove batting_helmet battle-ax, battle-axe battle_cruiser battle_dress battlement, crenelation, crenellation battleship, battlewagon battle_sight, battlesight bay bay bayonet bay_rum bay_window, bow_window bazaar, bazar bazaar, bazar bazooka B_battery BB_gun beach_house beach_towel beach_wagon, station_wagon, wagon, estate_car, beach_waggon, station_waggon, waggon beachwear beacon, lighthouse, beacon_light, pharos beading_plane beaker beaker beam beam_balance beanbag beanie, beany bearing bearing_rein, checkrein bearing_wall bearskin, busby, shako beater beating-reed_instrument, reed_instrument, reed beaver, castor beaver Beckman_thermometer bed bed bed_and_breakfast, bed-and-breakfast bedclothes, bed_clothing, bedding Bedford_cord bed_jacket bedpan bedpost bedroll bedroom, sleeping_room, sleeping_accommodation, chamber, bedchamber bedroom_furniture bedsitting_room, bedsitter, bedsit bedspread, bedcover, bed_cover, bed_covering, counterpane, spread bedspring bedstead, bedframe beefcake beehive, hive beeper, pager beer_barrel, beer_keg beer_bottle beer_can beer_garden beer_glass beer_hall beer_mat beer_mug, stein belaying_pin belfry bell bell_arch bellarmine, longbeard, long-beard, greybeard bellbottom_trousers, bell-bottoms, bellbottom_pants bell_cote, bell_cot bell_foundry bell_gable bell_jar, bell_glass bellows bellpull bell_push bell_seat, balloon_seat bell_tent bell_tower bellyband belt belt, belt_ammunition, belted_ammunition belt_buckle belting bench bench_clamp bench_hook bench_lathe bench_press bender beret berlin Bermuda_shorts, Jamaica_shorts berth, bunk, built_in_bed besom Bessemer_converter bethel betting_shop bevatron bevel, bevel_square bevel_gear, pinion_and_crown_wheel, pinion_and_ring_gear B-flat_clarinet, licorice_stick bib bib-and-tucker bicorn, bicorne bicycle, bike, wheel, cycle bicycle-built-for-two, tandem_bicycle, tandem bicycle_chain bicycle_clip, trouser_clip bicycle_pump bicycle_rack bicycle_seat, saddle bicycle_wheel bidet bier bier bi-fold_door bifocals Big_Blue, BLU-82 big_board bight bikini, two-piece bikini_pants bilge bilge_keel bilge_pump bilge_well bill, peak, eyeshade, visor, vizor bill, billhook billboard, hoarding billiard_ball billiard_room, billiard_saloon, billiard_parlor, billiard_parlour, billiard_hall bin binder, ligature binder, ring-binder bindery binding, book_binding, cover, back bin_liner binnacle binoculars, field_glasses, opera_glasses binocular_microscope biochip biohazard_suit bioscope biplane birch, birch_rod birchbark_canoe, birchbark, birch_bark birdbath birdcage birdcall bird_feeder, birdfeeder, feeder birdhouse bird_shot, buckshot, duck_shot biretta, berretta, birretta bishop bistro bit bit bite_plate, biteplate bitewing bitumastic black black blackboard, chalkboard blackboard_eraser black_box blackface blackjack, cosh, sap black_tie blackwash bladder blade blade, vane blade blank, dummy, blank_shell blanket, cover blast_furnace blasting_cap blazer, sport_jacket, sport_coat, sports_jacket, sports_coat blender, liquidizer, liquidiser blimp, sausage_balloon, sausage blind, screen blind_curve, blind_bend blindfold bling, bling_bling blinker, flasher blister_pack, bubble_pack block blockade blockade-runner block_and_tackle blockbuster blockhouse block_plane bloodmobile bloomers, pants, drawers, knickers blouse blower blowtorch, torch, blowlamp blucher bludgeon blue blue_chip blunderbuss blunt_file boarding boarding_house, boardinghouse boardroom, council_chamber boards boat boater, leghorn, Panama, Panama_hat, sailor, skimmer, straw_hat boat_hook boathouse boatswain's_chair, bosun's_chair boat_train boatyard bobbin, spool, reel bobby_pin, hairgrip, grip bobsled, bobsleigh, bob bobsled, bobsleigh bocce_ball, bocci_ball, boccie_ball bodega bodice bodkin, threader bodkin bodkin body body_armor, body_armour, suit_of_armor, suit_of_armour, coat_of_mail, cataphract body_lotion body_stocking body_plethysmograph body_pad bodywork Bofors_gun bogy, bogie, bogey boiler, steam_boiler boiling_water_reactor, BWR bolero bollard, bitt bolo, bolo_knife bolo_tie, bolo, bola_tie, bola bolt bolt, deadbolt bolt bolt_cutter bomb bombazine bomb_calorimeter, bomb bomber bomber_jacket bomblet, cluster_bomblet bomb_rack bombshell bomb_shelter, air-raid_shelter, bombproof bone-ash_cup, cupel, refractory_pot bone_china bones, castanets, clappers, finger_cymbals boneshaker bongo, bongo_drum bonnet, poke_bonnet book book_bag bookbindery bookcase bookend bookmark, bookmarker bookmobile bookshelf bookshop, bookstore, bookstall boom boom, microphone_boom boomerang, throwing_stick, throw_stick booster, booster_rocket, booster_unit, takeoff_booster, takeoff_rocket booster, booster_amplifier, booster_station, relay_link, relay_station, relay_transmitter boot boot boot_camp bootee, bootie booth, cubicle, stall, kiosk booth booth boothose bootjack bootlace bootleg bootstrap bore_bit, borer, rock_drill, stone_drill boron_chamber borstal bosom Boston_rocker bota bottle bottle, feeding_bottle, nursing_bottle bottle_bank bottlebrush bottlecap bottle_opener bottling_plant bottom, freighter, merchantman, merchant_ship boucle boudoir boulle, boule, buhl bouncing_betty bouquet, corsage, posy, nosegay boutique, dress_shop boutonniere bow bow bow, bowknot bow_and_arrow bowed_stringed_instrument, string Bowie_knife bowl bowl bowl bowler_hat, bowler, derby_hat, derby, plug_hat bowline, bowline_knot bowling_alley bowling_ball, bowl bowling_equipment bowling_pin, pin bowling_shoe bowsprit bowstring bow_tie, bow-tie, bowtie box box, loge box, box_seat box_beam, box_girder box_camera, box_Kodak boxcar box_coat boxing_equipment boxing_glove, glove box_office, ticket_office, ticket_booth box_spring box_wrench, box_end_wrench brace, bracing brace, braces, orthodontic_braces brace brace, suspender, gallus brace_and_bit bracelet, bangle bracer, armguard brace_wrench bracket, wall_bracket bradawl, pricker brake brake brake_band brake_cylinder, hydraulic_brake_cylinder, master_cylinder brake_disk brake_drum, drum brake_lining brake_pad brake_pedal brake_shoe, shoe, skid brake_system, brakes brass, brass_instrument brass, memorial_tablet, plaque brass brassard brasserie brassie brassiere, bra, bandeau brass_knucks, knucks, brass_knuckles, knuckles, knuckle_duster brattice brazier, brasier breadbasket bread-bin, breadbox bread_knife breakable breakfast_area, breakfast_nook breakfast_table breakwater, groin, groyne, mole, bulwark, seawall, jetty breast_drill breast_implant breastplate, aegis, egis breast_pocket breathalyzer, breathalyser breechblock, breech_closer breechcloth, breechclout, loincloth breeches, knee_breeches, knee_pants, knickerbockers, knickers breeches_buoy breechloader breeder_reactor Bren, Bren_gun brewpub brick brickkiln bricklayer's_hammer brick_trowel, mason's_trowel brickwork bridal_gown, wedding_gown, wedding_dress bridge, span bridge, nosepiece bridle bridle_path, bridle_road bridoon briefcase briefcase_bomb briefcase_computer briefs, Jockey_shorts brig brig brigandine brigantine, hermaphrodite_brig brilliantine brilliant_pebble brim bristle_brush britches broad_arrow broadax, broadaxe brochette broadcaster, spreader broadcloth broadcloth broad_hatchet broadloom broadside broadsword brocade brogan, brogue, clodhopper, work_shoe broiler broken_arch bronchoscope broom broom_closet broomstick, broom_handle brougham Browning_automatic_rifle, BAR Browning_machine_gun, Peacemaker brownstone brunch_coat brush Brussels_carpet Brussels_lace bubble bubble_chamber bubble_jet_printer, bubble-jet_printer, bubblejet buckboard bucket, pail bucket_seat bucket_shop buckle buckram bucksaw buckskins buff, buffer buffer, polisher buffer, buffer_storage, buffer_store buffet, counter, sideboard buffing_wheel buggy, roadster bugle building, edifice building_complex, complex bulldog_clip, alligator_clip bulldog_wrench bulldozer, dozer bullet, slug bulletproof_vest bullet_train, bullet bullhorn, loud_hailer, loud-hailer bullion bullnose, bullnosed_plane bullpen, detention_cell, detention_centre bullpen bullring bulwark bumboat bumper bumper bumper_car, Dodgem bumper_guard bumper_jack bundle, sheaf bung, spile bungalow, cottage bungee, bungee_cord bunghole bunk bunk, feed_bunk bunk_bed, bunk bunker, sand_trap, trap bunker, dugout bunker bunsen_burner, bunsen, etna bunting bur, burr Burberry burette, buret burglar_alarm burial_chamber, sepulcher, sepulchre, sepulture burial_garment burial_mound, grave_mound, barrow, tumulus burin burqa, burka burlap, gunny burn_bag burner burnous, burnoose, burnouse burp_gun, machine_pistol burr bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger_vehicle bushel_basket bushing, cylindrical_lining bush_jacket business_suit buskin, combat_boot, desert_boot, half_boot, top_boot bustier bustle butcher_knife butcher_shop, meat_market butter_dish butterfly_valve butter_knife butt_hinge butt_joint, butt button buttonhook buttress, buttressing butt_shaft butt_weld, butt-weld buzz_bomb, robot_bomb, flying_bomb, doodlebug, V-1 buzzer BVD, BVD's bypass_condenser, bypass_capacitor byway, bypath, byroad cab, hack, taxi, taxicab cab, cabriolet cab cabana cabaret, nightclub, night_club, club, nightspot caber cabin cabin cabin_car, caboose cabin_class, second_class, economy_class cabin_cruiser, cruiser, pleasure_boat, pleasure_craft cabinet cabinet, console cabinet, locker, storage_locker cabinetwork cabin_liner cable, cable_television, cable_system, cable_television_service cable, line, transmission_line cable_car, car cache, memory_cache caddy, tea_caddy caesium_clock cafe, coffeehouse, coffee_shop, coffee_bar cafeteria cafeteria_tray caff caftan, kaftan caftan, kaftan cage, coop cage cagoule caisson calash, caleche, calash_top calceus calcimine calculator, calculating_machine caldron, cauldron calico caliper, calliper call-board call_center, call_centre caller_ID calliope, steam_organ calorimeter calpac, calpack, kalpac camail, aventail, ventail camber_arch cambric camcorder camel's_hair, camelhair camera, photographic_camera camera_lens, optical_lens camera_lucida camera_obscura camera_tripod camise camisole camisole, underbodice camlet camouflage camouflage, camo camp, encampment, cantonment, bivouac camp camp, refugee_camp campaign_hat campanile, belfry camp_chair camper, camping_bus, motor_home camper_trailer campstool camshaft can, tin, tin_can canal canal_boat, narrow_boat, narrowboat candelabrum, candelabra candid_camera candle, taper, wax_light candlepin candlesnuffer candlestick, candle_holder candlewick candy_thermometer cane cane cangue canister, cannister, tin cannery cannikin cannikin cannon cannon cannon cannon cannonball, cannon_ball, round_shot canoe can_opener, tin_opener canopic_jar, canopic_vase canopy canopy canopy canteen canteen canteen canteen, mobile_canteen canteen cant_hook cantilever cantilever_bridge cantle Canton_crepe canvas, canvass canvas, canvass canvas_tent, canvas, canvass cap cap cap capacitor, capacitance, condenser, electrical_condenser caparison, trapping, housing cape, mantle capital_ship capitol cap_opener capote, hooded_cloak capote, hooded_coat cap_screw capstan capstone, copestone, coping_stone, stretcher capsule captain's_chair car, auto, automobile, machine, motorcar car, railcar, railway_car, railroad_car car, elevator_car carabiner, karabiner, snap_ring carafe, decanter caravansary, caravanserai, khan, caravan_inn car_battery, automobile_battery carbine car_bomb carbon_arc_lamp, carbon_arc carboy carburetor, carburettor car_carrier cardcase cardiac_monitor, heart_monitor cardigan card_index, card_catalog, card_catalogue cardiograph, electrocardiograph cardioid_microphone car_door cardroom card_table card_table car-ferry cargo_area, cargo_deck, cargo_hold, hold, storage_area cargo_container cargo_door cargo_hatch cargo_helicopter cargo_liner cargo_ship, cargo_vessel carillon car_mirror caroche carousel, carrousel, merry-go-round, roundabout, whirligig carpenter's_hammer, claw_hammer, clawhammer carpenter's_kit, tool_kit carpenter's_level carpenter's_mallet carpenter's_rule carpenter's_square carpetbag carpet_beater, rug_beater carpet_loom carpet_pad, rug_pad, underlay, underlayment carpet_sweeper, sweeper carpet_tack carport, car_port carrack, carack carrel, carrell, cubicle, stall carriage, equipage, rig carriage carriage_bolt carriageway carriage_wrench carrick_bend carrier carryall, holdall, tote, tote_bag carrycot car_seat cart car_tire, automobile_tire, auto_tire, rubber_tire carton cartouche, cartouch car_train cartridge cartridge, pickup cartridge_belt cartridge_extractor, cartridge_remover, extractor cartridge_fuse cartridge_holder, cartridge_clip, clip, magazine cartwheel carving_fork carving_knife car_wheel caryatid cascade_liquefier cascade_transformer case case, display_case, showcase, vitrine case, compositor's_case, typesetter's_case casein_paint, casein case_knife, sheath_knife case_knife casement casement_window casern case_shot, canister, canister_shot cash_bar cashbox, money_box, till cash_machine, cash_dispenser, automated_teller_machine, automatic_teller_machine, automated_teller, automatic_teller, ATM cashmere cash_register, register casing, case casino, gambling_casino casket, jewel_casket casque casquet, casquetel Cassegrainian_telescope, Gregorian_telescope casserole cassette cassette_deck cassette_player cassette_recorder cassette_tape cassock cast, plaster_cast, plaster_bandage caster, castor caster, castor castle castle, rook catacomb catafalque catalytic_converter catalytic_cracker, cat_cracker catamaran catapult, arbalest, arbalist, ballista, bricole, mangonel, onager, trebuchet, trebucket catapult, launcher catboat cat_box catch catchall catcher's_mask catchment Caterpillar, cat cathedra, bishop's_throne cathedral cathedral, duomo catheter cathode cathode-ray_tube, CRT cat-o'-nine-tails, cat cat's-paw catsup_bottle, ketchup_bottle cattle_car cattle_guard, cattle_grid cattleship, cattle_boat cautery, cauterant cavalier_hat, slouch_hat cavalry_sword, saber, sabre cavetto cavity_wall C_battery C-clamp CD_drive CD_player CD-R, compact_disc_recordable, CD-WO, compact_disc_write-once CD-ROM, compact_disc_read-only_memory CD-ROM_drive cedar_chest ceiling celesta cell, electric_cell cell, jail_cell, prison_cell cellar, wine_cellar cellblock, ward cello, violoncello cellophane cellular_telephone, cellular_phone, cellphone, cell, mobile_phone cellulose_tape, Scotch_tape, Sellotape cenotaph, empty_tomb censer, thurible center, centre center_punch Centigrade_thermometer central_processing_unit, CPU, C.P.U., central_processor, processor, mainframe centrifugal_pump centrifuge, extractor, separator ceramic ceramic_ware cereal_bowl cereal_box cerecloth cesspool, cesspit, sink, sump chachka, tsatske, tshatshke, tchotchke chador, chadar, chaddar, chuddar chafing_dish chain chain chainlink_fence chain_mail, ring_mail, mail, chain_armor, chain_armour, ring_armor, ring_armour chain_printer chain_saw, chainsaw chain_store chain_tongs chain_wrench chair chair chair_of_state chairlift, chair_lift chaise, shay chaise_longue, chaise, daybed chalet chalice, goblet chalk challis chamberpot, potty, thunder_mug chambray chamfer_bit chamfer_plane chamois_cloth chancel, sanctuary, bema chancellery chancery chandelier, pendant, pendent chandlery chanfron, chamfron, testiere, frontstall, front-stall chanter, melody_pipe chantry chap chapel chapterhouse, fraternity_house, frat_house chapterhouse character_printer, character-at-a-time_printer, serial_printer charcuterie charge-exchange_accelerator charger, battery_charger chariot chariot charnel_house, charnel chassis chassis chasuble chateau chatelaine checker, chequer checkout, checkout_counter cheekpiece cheeseboard, cheese_tray cheesecloth cheese_cutter cheese_press chemical_bomb, gas_bomb chemical_plant chemical_reactor chemise, sack, shift chemise, shimmy, shift, slip, teddy chenille chessman, chess_piece chest chesterfield chest_of_drawers, chest, bureau, dresser chest_protector cheval-de-frise, chevaux-de-frise cheval_glass chicane chicken_coop, coop, hencoop, henhouse chicken_wire chicken_yard, hen_yard, chicken_run, fowl_run chiffon chiffonier, commode child's_room chime, bell, gong chimney_breast chimney_corner, inglenook china china_cabinet, china_closet chinchilla Chinese_lantern Chinese_puzzle chinning_bar chino chino chin_rest chin_strap chintz chip, microchip, micro_chip, silicon_chip, microprocessor_chip chip, poker_chip chisel chlamys choir choir_loft choke choke, choke_coil, choking_coil chokey, choky choo-choo chopine, platform chordophone Christmas_stocking chronograph chronometer chronoscope chuck chuck_wagon chukka, chukka_boot church, church_building church_bell church_hat church_key church_tower churidars churn, butter_churn ciderpress cigar_band cigar_box cigar_cutter cigarette_butt cigarette_case cigarette_holder cigar_lighter, cigarette_lighter, pocket_lighter cinch, girth cinema, movie_theater, movie_theatre, movie_house, picture_palace cinquefoil circle, round circlet circuit, electrical_circuit, electric_circuit circuit_board, circuit_card, board, card, plug-in, add-in circuit_breaker, breaker circuitry circular_plane, compass_plane circular_saw, buzz_saw circus_tent, big_top, round_top, top cistern cistern, water_tank cittern, cithern, cither, citole, gittern city_hall cityscape city_university civies, civvies civilian_clothing, civilian_dress, civilian_garb, plain_clothes clack_valve, clack, clapper_valve clamp, clinch clamshell, grapple clapper, tongue clapperboard clarence clarinet Clark_cell, Clark_standard_cell clasp clasp_knife, jackknife classroom, schoolroom clavichord clavier, Klavier clay_pigeon claymore_mine, claymore claymore cleaners, dry_cleaners cleaning_implement, cleaning_device, cleaning_equipment cleaning_pad clean_room, white_room clearway cleat cleat cleats cleaver, meat_cleaver, chopper clerestory, clearstory clevis clews cliff_dwelling climbing_frame clinch clinch, clench clincher clinic clinical_thermometer, mercury-in-glass_clinical_thermometer clinker, clinker_brick clinometer, inclinometer clip clip_lead clip-on clipper clipper clipper, clipper_ship cloak cloak cloakroom, coatroom cloche cloche clock clock_pendulum clock_radio clock_tower clockwork clog, geta, patten, sabot cloisonne cloister closed_circuit, loop closed-circuit_television closed_loop, closed-loop_system closet closeup_lens cloth_cap, flat_cap cloth_covering clothesbrush clothes_closet, clothespress clothes_dryer, clothes_drier clothes_hamper, laundry_basket, clothes_basket, voider clotheshorse clothespin, clothes_pin, clothes_peg clothes_tree, coat_tree, coat_stand clothing, article_of_clothing, vesture, wear, wearable, habiliment clothing_store, haberdashery, haberdashery_store, mens_store clout_nail, clout clove_hitch club_car, lounge_car clubroom cluster_bomb clutch clutch, clutch_pedal clutch_bag, clutch coach, four-in-hand, coach-and-four coach_house, carriage_house, remise coal_car coal_chute coal_house coal_shovel coaming coaster_brake coat coat_button coat_closet coatdress coatee coat_hanger, clothes_hanger, dress_hanger coating, coat coating coat_of_paint coatrack, coat_rack, hatrack coattail coaxial_cable, coax, coax_cable cobweb cobweb Cockcroft_and_Walton_accelerator, Cockcroft-Walton_accelerator, Cockcroft_and_Walton_voltage_multiplier, Cockcroft-Walton_voltage_multiplier cocked_hat cockhorse cockleshell cockpit cockpit cockpit cockscomb, coxcomb cocktail_dress, sheath cocktail_lounge cocktail_shaker cocotte codpiece coelostat coffee_can coffee_cup coffee_filter coffee_maker coffee_mill, coffee_grinder coffee_mug coffeepot coffee_stall coffee_table, cocktail_table coffee_urn coffer Coffey_still coffin, casket cog, sprocket coif coil, spiral, volute, whorl, helix coil coil coil_spring, volute_spring coin_box colander, cullender cold_cathode cold_chisel, set_chisel cold_cream, coldcream, face_cream, vanishing_cream cold_frame collar, neckband collar college collet, collet_chuck collider colliery, pit collimator collimator cologne, cologne_water, eau_de_cologne colonnade colonoscope colorimeter, tintometer colors, colours color_television, colour_television, color_television_system, colour_television_system, color_TV, colour_TV color_tube, colour_tube, color_television_tube, colour_television_tube, color_TV_tube, colour_TV_tube color_wash, colour_wash Colt colter, coulter columbarium columbarium, cinerarium column, pillar column, pillar comb comb comber combination_lock combination_plane combine comforter, pacifier, baby's_dummy, teething_ring command_module commissary commissary commodity, trade_good, good common_ax, common_axe, Dayton_ax, Dayton_axe common_room communications_satellite communication_system community_center, civic_center commutator commuter, commuter_train compact, powder_compact compact, compact_car compact_disk, compact_disc, CD compact-disk_burner, CD_burner companionway compartment compartment compass compass compass_card, mariner's_compass compass_saw compound compound_lens compound_lever compound_microscope compress compression_bandage, tourniquet compressor computer, computing_machine, computing_device, data_processor, electronic_computer, information_processing_system computer_circuit computerized_axial_tomography_scanner, CAT_scanner computer_keyboard, keypad computer_monitor computer_network computer_screen, computer_display computer_store computer_system, computing_system, automatic_data_processing_system, ADP_system, ADPS concentration_camp, stockade concert_grand, concert_piano concert_hall concertina concertina concrete_mixer, cement_mixer condensation_pump, diffusion_pump condenser, optical_condenser condenser condenser condenser_microphone, capacitor_microphone condominium condominium, condo conductor cone_clutch, cone_friction_clutch confectionery, confectionary, candy_store conference_center, conference_house conference_room conference_table, council_table, council_board confessional conformal_projection, orthomorphic_projection congress_boot, congress_shoe, congress_gaiter conic_projection, conical_projection connecting_rod connecting_room connection, connexion, connector, connecter, connective conning_tower conning_tower conservatory, hothouse, indoor_garden conservatory, conservatoire console console console_table, console consulate contact, tangency contact, contact_lens container container_ship, containership, container_vessel containment contrabassoon, contrafagotto, double_bassoon control, controller control_center control_circuit, negative_feedback_circuit control_key, command_key control_panel, instrument_panel, control_board, board, panel control_rod control_room control_system control_tower convector convenience_store convent conventicle, meetinghouse converging_lens, convex_lens converter, convertor convertible convertible, sofa_bed conveyance, transport conveyer_belt, conveyor_belt, conveyer, conveyor, transporter cooker cookfire cookhouse cookie_cutter cookie_jar, cooky_jar cookie_sheet, baking_tray cooking_utensil, cookware cookstove coolant_system cooler, ice_chest cooling_system, cooling cooling_system, engine_cooling_system cooling_tower coonskin_cap, coonskin cope coping_saw copperware copyholder coquille coracle corbel, truss corbel_arch corbel_step, corbie-step, corbiestep, crow_step corbie_gable cord, corduroy cord, electric_cord cordage cords, corduroys core core_bit core_drill corer cork, bottle_cork corker corkscrew, bottle_screw corncrib corner, quoin corner, nook corner_post cornet, horn, trumpet, trump cornice cornice cornice, valance, valance_board, pelmet correctional_institution corrugated_fastener, wiggle_nail corselet, corslet corset, girdle, stays cosmetic cosmotron costume costume costume costume cosy, tea_cosy, cozy, tea_cozy cot, camp_bed cottage_tent cotter, cottar cotter_pin cotton cotton_flannel, Canton_flannel cotton_mill couch couch couchette coude_telescope, coude_system counter counter, tabulator counter counterbore, countersink, countersink_bit counter_tube country_house country_store, general_store, trading_post coupe coupling, coupler court, courtyard court court, courtroom court Courtelle courthouse courthouse coverall covered_bridge covered_couch covered_wagon, Conestoga_wagon, Conestoga, prairie_wagon, prairie_schooner covering coverlet cover_plate cowbarn, cowshed, cow_barn, cowhouse, byre cowbell cowboy_boot cowboy_hat, ten-gallon_hat cowhide cowl cow_pen, cattle_pen, corral CPU_board, mother_board crackle, crackleware, crackle_china cradle craft cramp, cramp_iron crampon, crampoon, climbing_iron, climber crampon, crampoon crane craniometer crank, starter crankcase crankshaft crash_barrier crash_helmet crate cravat crayon, wax_crayon crazy_quilt cream, ointment, emollient cream_pitcher, creamer creche, foundling_hospital creche credenza, credence creel crematory, crematorium, cremation_chamber crematory, crematorium crepe, crape crepe_de_Chine crescent_wrench cretonne crib, cot crib cricket_ball cricket_bat, bat cricket_equipment cringle, eyelet, loop, grommet, grummet crinoline crinoline crochet_needle, crochet_hook crock, earthenware_jar Crock_Pot crook, shepherd's_crook Crookes_radiometer Crookes_tube croquet_ball croquet_equipment croquet_mallet cross crossbar crossbar crossbar crossbench cross_bit crossbow crosscut_saw, crosscut_handsaw, cutoff_saw crossjack, mizzen_course crosspiece crotchet croupier's_rake crowbar, wrecking_bar, pry, pry_bar crown, diadem crown, crownwork, jacket, jacket_crown, cap crown_jewels crown_lens crow's_nest crucible, melting_pot crucifix, rood, rood-tree cruet, crewet cruet-stand cruise_control cruise_missile cruiser cruiser, police_cruiser, patrol_car, police_car, prowl_car, squad_car cruise_ship, cruise_liner crupper cruse crusher crutch cryometer cryoscope cryostat crypt crystal, watch_crystal, watch_glass crystal_detector crystal_microphone crystal_oscillator, quartz_oscillator crystal_set cubitiere cucking_stool, ducking_stool cuckoo_clock cuddy cudgel cue, cue_stick, pool_cue, pool_stick cue_ball cuff, turnup cuirass cuisse cul, cul_de_sac, dead_end culdoscope cullis culotte cultivator, tiller culverin culverin culvert cup cupboard, closet cup_hook cupola cupola curb, curb_bit curb_roof curbstone, kerbstone curette, curet curler, hair_curler, roller, crimper curling_iron currycomb cursor, pointer curtain, drape, drapery, mantle, pall customhouse, customshouse cutaway, cutaway_drawing, cutaway_model cutlas, cutlass cutoff cutout cutter, cutlery, cutting_tool cutter cutting_implement cutting_room cutty_stool cutwork cybercafe cyclopean_masonry cyclostyle cyclotron cylinder cylinder, piston_chamber cylinder_lock cymbal dacha Dacron, Terylene dado dado_plane dagger, sticker dairy, dairy_farm dais, podium, pulpit, rostrum, ambo, stump, soapbox daisy_print_wheel, daisy_wheel daisywheel_printer dam, dike, dyke damask dampener, moistener damper, muffler damper_block, piano_damper dark_lantern, bull's-eye darkroom darning_needle, embroidery_needle dart dart dashboard, fascia dashiki, daishiki dash-pot data_converter data_input_device, input_device data_multiplexer data_system, information_system davenport davenport davit daybed, divan_bed daybook, ledger day_nursery, day_care_center day_school dead_axle deadeye deadhead deanery deathbed death_camp death_house, death_row death_knell, death_bell death_seat deck deck deck_chair, beach_chair deck-house deckle deckle_edge, deckle declinometer, transit_declinometer decoder decolletage decoupage dedicated_file_server deep-freeze, Deepfreeze, deep_freezer, freezer deerstalker defense_system, defence_system defensive_structure, defense, defence defibrillator defilade deflector delayed_action delay_line delft delicatessen, deli, food_shop delivery_truck, delivery_van, panel_truck delta_wing demijohn demitasse den denim, dungaree, jean densimeter, densitometer densitometer dental_appliance dental_floss, floss dental_implant dentist's_drill, burr_drill denture, dental_plate, plate deodorant, deodourant department_store, emporium departure_lounge depilatory, depilator, epilator depressor depth_finder depth_gauge, depth_gage derrick derrick derringer desk desk_phone desktop_computer dessert_spoon destroyer, guided_missile_destroyer destroyer_escort detached_house, single_dwelling detector, sensor, sensing_element detector detention_home, detention_house, house_of_detention, detention_camp detonating_fuse detonator, detonating_device, cap developer device Dewar_flask, Dewar dhoti dhow dial, telephone_dial dial dial dialog_box, panel dial_telephone, dial_phone dialyzer, dialysis_machine diamante diaper, nappy, napkin diaper diaphone diaphragm, stop diaphragm diathermy_machine dibble, dibber dice_cup, dice_box dicer dickey, dickie, dicky, shirtfront dickey, dickie, dicky, dickey-seat, dickie-seat, dicky-seat Dictaphone die diesel, diesel_engine, diesel_motor diesel-electric_locomotive, diesel-electric diesel-hydraulic_locomotive, diesel-hydraulic diesel_locomotive diestock differential_analyzer differential_gear, differential diffuser, diffusor diffuser, diffusor digester diggings, digs, domiciliation, lodgings, pad digital-analog_converter, digital-to-analog_converter digital_audiotape, DAT digital_camera digital_clock digital_computer digital_display, alphanumeric_display digital_subscriber_line, DSL digital_voltmeter digital_watch digitizer, digitiser, analog-digital_converter, analog-to-digital_converter dilator, dilater dildo dimity dimmer diner dinette dinghy, dory, rowboat dining_area dining_car, diner, dining_compartment, buffet_car dining-hall dining_room, dining-room dining-room_furniture dining-room_table dining_table, board dinner_bell dinner_dress, dinner_gown, formal, evening_gown dinner_jacket, tux, tuxedo, black_tie dinner_napkin dinner_pail, dinner_bucket dinner_table dinner_theater, dinner_theatre diode, semiconductor_diode, junction_rectifier, crystal_rectifier diode, rectifying_tube, rectifying_valve dip diplomatic_building dipole, dipole_antenna dipper dipstick DIP_switch, dual_inline_package_switch directional_antenna directional_microphone direction_finder dirk dirndl dirndl dirty_bomb discharge_lamp discharge_pipe disco, discotheque discount_house, discount_store, discounter, wholesale_house discus, saucer disguise dish dish, dish_aerial, dish_antenna, saucer dishpan dish_rack dishrag, dishcloth dishtowel, dish_towel, tea_towel dishwasher, dish_washer, dishwashing_machine disk, disc disk_brake, disc_brake disk_clutch disk_controller disk_drive, disc_drive, hard_drive, Winchester_drive diskette, floppy, floppy_disk disk_harrow, disc_harrow dispatch_case, dispatch_box dispensary dispenser display, video_display display_adapter, display_adaptor display_panel, display_board, board display_window, shop_window, shopwindow, show_window disposal, electric_pig, garbage_disposal disrupting_explosive, bursting_explosive distaff distillery, still distributor, distributer, electrical_distributor distributor_cam distributor_cap distributor_housing distributor_point, breaker_point, point ditch ditch_spade, long-handled_spade ditty_bag divan divan, diwan dive_bomber diverging_lens, concave_lens divided_highway, dual_carriageway divider diving_bell divining_rod, dowser, dowsing_rod, waterfinder, water_finder diving_suit, diving_dress dixie Dixie_cup, paper_cup dock, dockage, docking_facility doeskin dogcart doggie_bag, doggy_bag dogsled, dog_sled, dog_sleigh dog_wrench doily, doyley, doyly doll, dolly dollhouse, doll's_house dolly dolman dolman, dolman_jacket dolman_sleeve dolmen, cromlech, portal_tomb dome dome, domed_stadium, covered_stadium domino, half_mask, eye_mask dongle donkey_jacket door door door doorbell, bell, buzzer doorframe, doorcase doorjamb, doorpost doorlock doormat, welcome_mat doornail doorplate doorsill, doorstep, threshold doorstop, doorstopper Doppler_radar dormer, dormer_window dormer_window dormitory, dorm, residence_hall, hall, student_residence dormitory, dormitory_room, dorm_room dosemeter, dosimeter dossal, dossel dot_matrix_printer, matrix_printer, dot_printer double_bed double-bitted_ax, double-bitted_axe, Western_ax, Western_axe double_boiler, double_saucepan double-breasted_jacket double-breasted_suit double_door double_glazing double-hung_window double_knit doubler double_reed double-reed_instrument, double_reed doublet doubletree douche, douche_bag dovecote, columbarium, columbary Dover's_powder dovetail, dovetail_joint dovetail_plane dowel, dowel_pin, joggle downstage drafting_instrument drafting_table, drawing_table Dragunov drainage_ditch drainage_system drain_basket drainplug drape drapery drawbar drawbridge, lift_bridge drawer drawers, underdrawers, shorts, boxers, boxershorts drawing_chalk drawing_room, withdrawing_room drawing_room drawknife, drawshave drawstring_bag dray, camion dreadnought, dreadnaught dredge dredger dredging_bucket dress, frock dress_blues, dress_whites dresser dress_hat, high_hat, opera_hat, silk_hat, stovepipe, top_hat, topper, beaver dressing, medical_dressing dressing_case dressing_gown, robe-de-chambre, lounging_robe dressing_room dressing_sack, dressing_sacque dressing_table, dresser, vanity, toilet_table dress_rack dress_shirt, evening_shirt dress_suit, full_dress, tailcoat, tail_coat, tails, white_tie, white_tie_and_tails dress_uniform drift_net drill electric_drill drilling_platform, offshore_rig drill_press drill_rig, drilling_rig, oilrig, oil_rig drinking_fountain, water_fountain, bubbler drinking_vessel drip_loop drip_mat drip_pan dripping_pan, drip_pan drip_pot drive drive drive_line, drive_line_system driver, number_one_wood driveshaft driveway, drive, private_road driving_iron, one_iron driving_wheel drogue, drogue_chute, drogue_parachute drogue_parachute drone, drone_pipe, bourdon drone, pilotless_aircraft, radio-controlled_aircraft drop_arch drop_cloth drop_curtain, drop_cloth, drop drop_forge, drop_hammer, drop_press drop-leaf_table dropper, eye_dropper droshky, drosky drove, drove_chisel drugget drugstore, apothecary's_shop, chemist's, chemist's_shop, pharmacy drum, membranophone, tympan drum, metal_drum drum_brake drumhead, head drum_printer drum_sander, electric_sander, sander, smoother drumstick dry_battery dry-bulb_thermometer dry_cell dry_dock, drydock, graving_dock dryer, drier dry_fly dry_kiln dry_masonry dry_point dry_wall, dry-stone_wall dual_scan_display duck duckboard duckpin dudeen duffel, duffle duffel_bag, duffle_bag, duffel, duffle duffel_coat, duffle_coat dugout dugout_canoe, dugout, pirogue dulciana dulcimer dulcimer dumbbell dumb_bomb, gravity_bomb dumbwaiter, food_elevator dumdum, dumdum_bullet dumpcart Dumpster dump_truck, dumper, tipper_truck, tipper_lorry, tip_truck, tipper Dumpy_level dunce_cap, dunce's_cap, fool's_cap dune_buggy, beach_buggy dungeon duplex_apartment, duplex duplex_house, duplex, semidetached_house duplicator, copier dust_bag, vacuum_bag dustcloth, dustrag, duster dust_cover dust_cover, dust_sheet dustmop, dust_mop, dry_mop dustpan Dutch_oven Dutch_oven dwelling, home, domicile, abode, habitation, dwelling_house dye-works dynamo dynamometer, ergometer Eames_chair earflap, earlap early_warning_radar early_warning_system earmuff earphone, earpiece, headphone, phone earplug earplug earthenware earthwork easel easy_chair, lounge_chair, overstuffed_chair eaves ecclesiastical_attire, ecclesiastical_robe echinus echocardiograph edger edge_tool efficiency_apartment egg-and-dart, egg-and-anchor, egg-and-tongue eggbeater, eggwhisk egg_timer eiderdown, duvet, continental_quilt eight_ball ejection_seat, ejector_seat, capsule elastic elastic_bandage Elastoplast elbow elbow_pad electric, electric_automobile, electric_car electrical_cable electrical_contact electrical_converter electrical_device electrical_system electric_bell electric_blanket electric_chair, chair, death_chair, hot_seat electric_clock electric-discharge_lamp, gas-discharge_lamp electric_fan, blower electric_frying_pan electric_furnace electric_guitar electric_hammer electric_heater, electric_fire electric_lamp electric_locomotive electric_meter, power_meter electric_mixer electric_motor electric_organ, electronic_organ, Hammond_organ, organ electric_range electric_refrigerator, fridge electric_toothbrush electric_typewriter electro-acoustic_transducer electrode electrodynamometer electroencephalograph electrograph electrolytic, electrolytic_capacitor, electrolytic_condenser electrolytic_cell electromagnet electrometer electromyograph electron_accelerator electron_gun electronic_balance electronic_converter electronic_device electronic_equipment electronic_fetal_monitor, electronic_foetal_monitor, fetal_monitor, foetal_monitor electronic_instrument, electronic_musical_instrument electronic_voltmeter electron_microscope electron_multiplier electrophorus electroscope electrostatic_generator, electrostatic_machine, Wimshurst_machine, Van_de_Graaff_generator electrostatic_printer elevator, lift elevator elevator_shaft embankment embassy embellishment emergency_room, ER emesis_basin emitter empty emulsion, photographic_emulsion enamel enamel enamelware encaustic encephalogram, pneumoencephalogram enclosure endoscope energizer, energiser engine engine engineering, engine_room enginery English_horn, cor_anglais English_saddle, English_cavalry_saddle enlarger ensemble ensign entablature entertainment_center entrenching_tool, trenching_spade entrenchment, intrenchment envelope envelope envelope, gasbag eolith epauliere epee epergne epicyclic_train, epicyclic_gear_train epidiascope epilating_wax equalizer, equaliser equatorial equipment erasable_programmable_read-only_memory, EPROM eraser erecting_prism erection Erlenmeyer_flask escape_hatch escapement escape_wheel escarpment, escarp, scarp, protective_embankment escutcheon, scutcheon esophagoscope, oesophagoscope espadrille espalier espresso_maker espresso_shop establishment estaminet estradiol_patch etagere etamine, etamin etching ethernet ethernet_cable Eton_jacket etui eudiometer euphonium evaporative_cooler evening_bag exercise_bike, exercycle exercise_device exhaust, exhaust_system exhaust_fan exhaust_valve exhibition_hall, exhibition_area Exocet expansion_bit, expansive_bit expansion_bolt explosive_detection_system, EDS explosive_device explosive_trace_detection, ETD express, limited extension, telephone_extension, extension_phone extension_cord external-combustion_engine external_drive extractor eyebrow_pencil eyecup, eyebath, eye_cup eyeliner eyepatch, patch eyepiece, ocular eyeshadow fabric, cloth, material, textile facade, frontage, frontal face_guard face_mask faceplate face_powder face_veil facing, cladding facing facing, veneer facsimile, facsimile_machine, fax factory, mill, manufacturing_plant, manufactory factory_ship fagot, faggot fagot_stitch, faggot_stitch Fahrenheit_thermometer faience faille fairlead fairy_light falchion fallboard, fall-board fallout_shelter false_face false_teeth family_room fan fan_belt fan_blade fancy_dress, masquerade, masquerade_costume fanion fanlight fanjet, fan-jet, fanjet_engine, turbojet, turbojet_engine, turbofan, turbofan_engine fanjet, fan-jet, turbofan, turbojet fanny_pack, butt_pack fan_tracery fan_vaulting farm_building farmer's_market, green_market, greenmarket farmhouse farm_machine farmplace, farm-place, farmstead farmyard farthingale fastener, fastening, holdfast, fixing fast_reactor fat_farm fatigues faucet, spigot fauld fauteuil feather_boa, boa featheredge fedora, felt_hat, homburg, Stetson, trilby feedback_circuit, feedback_loop feedlot fell, felled_seam felloe, felly felt felt-tip_pen, felt-tipped_pen, felt_tip, Magic_Marker felucca fence, fencing fencing_mask, fencer's_mask fencing_sword fender, wing fender, buffer, cowcatcher, pilot Ferris_wheel ferrule, collet ferry, ferryboat ferule festoon fetoscope, foetoscope fetter, hobble fez, tarboosh fiber, fibre, vulcanized_fiber fiber_optic_cable, fibre_optic_cable fiberscope fichu fiddlestick, violin_bow field_artillery, field_gun field_coil, field_winding field-effect_transistor, FET field-emission_microscope field_glass, glass, spyglass field_hockey_ball field_hospital field_house, sports_arena field_lens field_magnet field-sequential_color_television, field-sequential_color_TV, field-sequential_color_television_system, field-sequential_color_TV_system field_tent fieldwork fife fifth_wheel, spare fighter, fighter_aircraft, attack_aircraft fighting_chair fig_leaf figure_eight, figure_of_eight figure_loom, figured-fabric_loom figure_skate filament filature file file, file_cabinet, filing_cabinet file_folder file_server filigree, filagree, fillagree filling film, photographic_film film, plastic_film film_advance filter filter finder, viewfinder, view_finder finery fine-tooth_comb, fine-toothed_comb finger fingerboard finger_bowl finger_paint, fingerpaint finger-painting finger_plate, escutcheon, scutcheon fingerstall, cot finish_coat, finishing_coat finish_coat, finishing_coat finisher fin_keel fipple fipple_flute, fipple_pipe, recorder, vertical_flute fire fire_alarm, smoke_alarm firearm, piece, small-arm fire_bell fireboat firebox firebrick fire_control_radar fire_control_system fire_engine, fire_truck fire_extinguisher, extinguisher, asphyxiator fire_iron fireman's_ax, fireman's_axe fireplace, hearth, open_fireplace fire_screen, fireguard fire_tongs, coal_tongs fire_tower firewall firing_chamber, gun_chamber firing_pin firkin firmer_chisel first-aid_kit first-aid_station first_base first_class fishbowl, fish_bowl, goldfish_bowl fisherman's_bend fisherman's_knot, true_lover's_knot, truelove_knot fisherman's_lure, fish_lure fishhook fishing_boat, fishing_smack, fishing_vessel fishing_gear, tackle, fishing_tackle, fishing_rig, rig fishing_rod, fishing_pole fish_joint fish_knife fishnet, fishing_net fish_slice fitment fixative fixer-upper flag flageolet, treble_recorder, shepherd's_pipe flagon flagpole, flagstaff flagship flail flambeau flamethrower flange, rim flannel flannel, gabardine, tweed, white flannelette flap, flaps flash, photoflash, flash_lamp, flashgun, flashbulb, flash_bulb flash flash_camera flasher flashlight, torch flashlight_battery flash_memory flask flat_arch, straight_arch flatbed flatbed_press, cylinder_press flat_bench flatcar, flatbed, flat flat_file flatlet flat_panel_display, FPD flats flat_tip_screwdriver fleece fleet_ballistic_missile_submarine fleur-de-lis, fleur-de-lys flight_simulator, trainer flintlock flintlock, firelock flip-flop, thong flipper, fin float, plasterer's_float floating_dock, floating_dry_dock floatplane, pontoon_plane flood, floodlight, flood_lamp, photoflood floor, flooring floor, level, storey, story floor floorboard floor_cover, floor_covering floor_joist floor_lamp flophouse, dosshouse florist, florist_shop, flower_store floss flotsam, jetsam flour_bin flour_mill flowerbed, flower_bed, bed_of_flowers flugelhorn, fluegelhorn fluid_drive fluid_flywheel flume fluorescent_lamp fluoroscope, roentgenoscope flush_toilet, lavatory flute, transverse_flute flute, flute_glass, champagne_flute flux_applicator fluxmeter fly flying_boat flying_buttress, arc-boutant flying_carpet flying_jib fly_rod fly_tent flytrap flywheel fob, watch_chain, watch_guard foghorn foglamp foil fold, sheepfold, sheep_pen, sheepcote folder folding_chair folding_door, accordion_door folding_saw food_court food_processor food_hamper foot footage football football_helmet football_stadium footbath foot_brake footbridge, overcrossing, pedestrian_bridge foothold, footing footlocker, locker foot_rule footstool, footrest, ottoman, tuffet footwear, footgear footwear forceps force_pump fore-and-after fore-and-aft_sail forecastle, fo'c'sle forecourt foredeck fore_edge, foredge foreground foremast fore_plane foresail forestay foretop fore-topmast fore-topsail forge fork forklift formalwear, eveningwear, evening_dress, evening_clothes Formica fortification, munition fortress, fort forty-five Foucault_pendulum foulard foul-weather_gear foundation_garment, foundation foundry, metalworks fountain fountain_pen four-in-hand four-poster four-pounder four-stroke_engine, four-stroke_internal-combustion_engine four-wheel_drive, 4WD four-wheel_drive, 4WD four-wheeler fowling_piece foxhole, fox_hole fragmentation_bomb, antipersonnel_bomb, anti-personnel_bomb, daisy_cutter frail fraise frame, framing frame frame_buffer framework Francis_turbine franking_machine free_house free-reed free-reed_instrument freewheel freight_car freight_elevator, service_elevator freight_liner, liner_train freight_train, rattler French_door French_horn, horn French_polish, French_polish_shellac French_roof French_window Fresnel_lens fret friary friction_clutch frieze frieze frigate frigate frill, flounce, ruffle, furbelow Frisbee frock frock_coat frontlet, frontal front_porch front_projector fruit_machine frying_pan, frypan, skillet fuel_filter fuel_gauge, fuel_indicator fuel_injection, fuel_injection_system fuel_system full-dress_uniform full_metal_jacket full_skirt fumigator funeral_home, funeral_parlor, funeral_parlour, funeral_chapel, funeral_church, funeral-residence funnel funny_wagon fur fur_coat fur_hat furnace furnace_lining, refractory furnace_room furnishing furnishing, trappings furniture, piece_of_furniture, article_of_furniture fur-piece furrow fuse, electrical_fuse, safety_fuse fusee_drive, fusee fuselage fusil fustian futon gabardine gable, gable_end, gable_wall gable_roof, saddle_roof, saddleback, saddleback_roof gadgetry gaff gaff gaff gaffsail, gaff-headed_sail gaff_topsail, fore-and-aft_topsail gag, muzzle gaiter gaiter Galilean_telescope galleon gallery gallery, art_gallery, picture_gallery galley, ship's_galley, caboose, cookhouse galley galley gallows gallows_tree, gallows-tree, gibbet, gallous galvanometer gambling_house, gambling_den, gambling_hell, gaming_house gambrel, gambrel_roof game gamebag game_equipment gaming_table gamp, brolly gangplank, gangboard, gangway gangsaw gangway gantlet gantry, gauntry garage garage, service_department Garand_rifle, Garand, M-1, M-1_rifle garbage garbage_truck, dustcart garboard, garboard_plank, garboard_strake garden garden garden_rake garden_spade garden_tool, lawn_tool garden_trowel gargoyle garibaldi garlic_press garment garment_bag garrison_cap, overseas_cap garrote, garotte, garrotte, iron_collar garter, supporter garter_belt, suspender_belt garter_stitch gas_guzzler gas_shell gas_bracket gas_burner, gas_jet gas-cooled_reactor gas-discharge_tube gas_engine gas_fixture gas_furnace gas_gun gas_heater gas_holder, gasometer gasket gas_lamp gas_maser gasmask, respirator, gas_helmet gas_meter, gasometer gasoline_engine, petrol_engine gasoline_gauge, gasoline_gage, gas_gauge, gas_gage, petrol_gauge, petrol_gage gas_oven gas_oven gas_pump, gasoline_pump, petrol_pump, island_dispenser gas_range, gas_stove, gas_cooker gas_ring gas_tank, gasoline_tank, petrol_tank gas_thermometer, air_thermometer gastroscope gas_turbine gas-turbine_ship gat, rod gate gatehouse gateleg_table gatepost gathered_skirt Gatling_gun gauge, gage gauntlet, gantlet gauntlet, gantlet, metal_glove gauze, netting, veiling gauze, gauze_bandage gavel gazebo, summerhouse gear, gear_wheel, geared_wheel, cogwheel gear, paraphernalia, appurtenance gear, gear_mechanism gearbox, gear_box, gear_case gearing, gear, geartrain, power_train, train gearset gearshift, gearstick, shifter, gear_lever Geiger_counter, Geiger-Muller_counter Geiger_tube, Geiger-Muller_tube gene_chip, DNA_chip general-purpose_bomb, GP_bomb generator generator generator Geneva_gown geodesic_dome georgette gharry ghat ghetto_blaster, boom_box gift_shop, novelty_shop gift_wrapping gig gig gig gig gildhall gill_net gilt, gilding gimbal gingham girandole, girandola girder girdle, cincture, sash, waistband, waistcloth glass, drinking_glass glass glass_cutter glasses_case glebe_house Glengarry glider, sailplane Global_Positioning_System, GPS glockenspiel, orchestral_bells glory_hole, lazaretto glove glove_compartment glow_lamp glow_tube glyptic_art, glyptography glyptics, lithoglyptics gnomon goal goalmouth goalpost goblet godown goggles go-kart gold_plate golf_bag golf_ball golfcart, golf_cart golf_club, golf-club, club golf-club_head, club_head, club-head, clubhead golf_equipment golf_glove golliwog, golliwogg gondola gong, tam-tam goniometer Gordian_knot gorget gossamer Gothic_arch gouache gouge gourd, calabash government_building government_office gown gown, robe gown, surgical_gown, scrubs grab grab_bag grab_bar grace_cup grade_separation graduated_cylinder graffito, graffiti gramophone, acoustic_gramophone granary, garner grandfather_clock, longcase_clock grand_piano, grand graniteware granny_knot, granny grape_arbor, grape_arbour grapnel, grapnel_anchor grapnel, grapple, grappler, grappling_hook, grappling_iron grass_skirt grate, grating grate, grating grater graver, graving_tool, pointel, pointrel gravestone, headstone, tombstone gravimeter, gravity_meter gravure, photogravure, heliogravure gravy_boat, gravy_holder, sauceboat, boat grey, gray grease-gun, gun greasepaint greasy_spoon greatcoat, overcoat, topcoat great_hall greave, jambeau greengrocery greenhouse, nursery, glasshouse grenade grid, gridiron griddle grill, grille, grillwork grille, radiator_grille grillroom, grill grinder grinding_wheel, emery_wheel grindstone gripsack gristmill grocery_bag grocery_store, grocery, food_market, market grogram groined_vault groover grosgrain gros_point ground, earth ground_bait ground_control ground_floor, first_floor, ground_level groundsheet, ground_cloth G-string, thong guard, safety, safety_device guard_boat guardroom guardroom guard_ship guard's_van gueridon Guarnerius guesthouse guestroom guidance_system, guidance_device guided_missile guided_missile_cruiser guided_missile_frigate guildhall guilloche guillotine guimpe guimpe guitar guitar_pick gulag gun gunboat gun_carriage gun_case gun_emplacement, weapons_emplacement gun_enclosure, gun_turret, turret gunlock, firing_mechanism gunnery gunnysack, gunny_sack, burlap_bag gun_pendulum gun_room gunsight, gun-sight gun_trigger, trigger gurney gusher gusset, inset gusset, gusset_plate guy, guy_cable, guy_wire, guy_rope gymnastic_apparatus, exerciser gym_shoe, sneaker, tennis_shoe gym_suit gymslip gypsy_cab gyrocompass gyroscope, gyro gyrostabilizer, gyrostabiliser habergeon habit habit, riding_habit hacienda hacksaw, hack_saw, metal_saw haft, helve hairbrush haircloth, hair hairdressing, hair_tonic, hair_oil, hair_grease hairnet hairpiece, false_hair, postiche hairpin hair_shirt hair_slide hair_spray hairspring hair_trigger halberd half_binding half_hatchet half_hitch half_track hall hall hall Hall_of_Fame hall_of_residence hallstand halter halter, hackamore hame hammer hammer, power_hammer hammer hammerhead hammock, sack hamper hand handball handbarrow handbell hand_blower, blow_dryer, blow_drier, hair_dryer, hair_drier handbow hand_brake, emergency, emergency_brake, parking_brake hand_calculator, pocket_calculator handcar handcart, pushcart, cart, go-cart hand_cream handcuff, cuff, handlock, manacle hand_drill, handheld_drill hand_glass, simple_microscope, magnifying_glass hand_glass, hand_mirror hand_grenade hand-held_computer, hand-held_microcomputer handhold handkerchief, hankie, hanky, hankey handlebar handloom hand_lotion hand_luggage hand-me-down hand_mower hand_pump handrest handsaw, hand_saw, carpenter's_saw handset, French_telephone hand_shovel handspike handstamp, rubber_stamp hand_throttle hand_tool hand_towel, face_towel hand_truck, truck handwear, hand_wear handwheel handwheel hangar_queen hanger hang_glider hangman's_rope, hangman's_halter, halter, hemp, hempen_necktie hank hansom, hansom_cab harbor, harbour hard_disc, hard_disk, fixed_disk hard_hat, tin_hat, safety_hat hardtop hardware, ironware hardware_store, ironmonger, ironmonger's_shop harmonica, mouth_organ, harp, mouth_harp harmonium, organ, reed_organ harness harness harp harp harpoon harpoon_gun harpoon_log harpsichord, cembalo Harris_Tweed harrow harvester, reaper hash_house hasp hat, chapeau, lid hatbox hatch hatchback, hatchback_door hatchback hatchel, heckle hatchet hatpin hauberk, byrnie Hawaiian_guitar, steel_guitar hawse, hawsehole, hawsepipe hawser hawser_bend hay_bale hayfork hayloft, haymow, mow haymaker, hay_conditioner hayrack, hayrig hayrack hazard head head head headboard head_covering, veil headdress, headgear header header header, coping, cope header, lintel headfast head_gasket head_gate headgear headlight, headlamp headpiece headpin, kingpin headquarters, central_office, main_office, home_office, home_base headrace headrest headsail headscarf headset head_shop headstall, headpiece headstock health_spa, spa, health_club hearing_aid, ear_trumpet hearing_aid, deaf-aid hearse hearth, fireside hearthrug heart-lung_machine heat_engine heater, warmer heat_exchanger heating_pad, hot_pad heat_lamp, infrared_lamp heat_pump heat-seeking_missile heat_shield heat_sink heaume heaver heavier-than-air_craft heckelphone, basset_oboe hectograph, heliotype hedge, hedgerow hedge_trimmer helicon, bombardon helicopter, chopper, whirlybird, eggbeater heliograph heliometer helm helmet helmet hematocrit, haematocrit hemming-stitch hemostat, haemostat hemstitch, hemstitching henroost heraldry hermitage herringbone herringbone, herringbone_pattern Herschelian_telescope, off-axis_reflector Hessian_boot, hessian, jackboot, Wellington, Wellington_boot heterodyne_receiver, superheterodyne_receiver, superhet hibachi hideaway, retreat hi-fi, high_fidelity_sound_system high_altar high-angle_gun highball_glass highboard highboy, tallboy highchair, feeding_chair high_gear, high high-hat_cymbal, high_hat highlighter highlighter high-pass_filter high-rise, tower_block high_table high-warp_loom hijab hinge, flexible_joint hinging_post, swinging_post hip_boot, thigh_boot hipflask, pocket_flask hip_pad hip_pocket hippodrome hip_roof, hipped_roof hitch hitch hitching_post hitchrack, hitching_bar hob hobble_skirt hockey_skate hockey_stick hod hodoscope hoe hoe_handle hogshead hoist hold, keep holder holding_cell holding_device holding_pen, holding_paddock, holding_yard hollowware, holloware holster holster holy_of_holies, sanctum_sanctorum home, nursing_home, rest_home home_appliance, household_appliance home_computer home_plate, home_base, home, plate home_room, homeroom homespun homestead home_theater, home_theatre homing_torpedo hone honeycomb hood, bonnet, cowl, cowling hood hood hood, exhaust_hood hood hood_latch hook hook, claw hook hookah, narghile, nargileh, sheesha, shisha, chicha, calean, kalian, water_pipe, hubble-bubble, hubbly-bubbly hook_and_eye hookup, assemblage hookup hook_wrench, hook_spanner hoopskirt, crinoline hoosegow, hoosgow Hoover hope_chest, wedding_chest hopper hopsacking, hopsack horizontal_bar, high_bar horizontal_stabilizer, horizontal_stabiliser, tailplane horizontal_tail horn horn horn horn_button hornpipe, pibgorn, stockhorn horse, gymnastic_horse horsebox horsecar horse_cart, horse-cart horsecloth horse-drawn_vehicle horsehair horsehair_wig horseless_carriage horse_pistol, horse-pistol horseshoe, shoe horseshoe horse-trail horsewhip hose hosiery, hose hospice hospital, infirmary hospital_bed hospital_room hospital_ship hospital_train hostel, youth_hostel, student_lodging hostel, hostelry, inn, lodge, auberge hot-air_balloon hotel hotel-casino, casino-hotel hotel-casino, casino-hotel hotel_room hot_line hot_pants hot_plate, hotplate hot_rod, hot-rod hot_spot, hotspot hot_tub hot-water_bottle, hot-water_bag houndstooth_check, hound's-tooth_check, dogstooth_check, dogs-tooth_check, dog's-tooth_check hourglass hour_hand, little_hand house house houseboat houselights house_of_cards, cardhouse, card-house, cardcastle house_of_correction house_paint, housepaint housetop housing, lodging, living_accommodations hovel, hut, hutch, shack, shanty hovercraft, ground-effect_machine howdah, houdah huarache, huaraches hub-and-spoke, hub-and-spoke_system hubcap huck, huckaback hug-me-tight hula-hoop hulk hull humeral_veil, veil Humvee, Hum-Vee hunter, hunting_watch hunting_knife hurdle hurricane_deck, hurricane_roof, promenade_deck, awning_deck hurricane_lamp, hurricane_lantern, tornado_lantern, storm_lantern, storm_lamp hut, army_hut, field_hut hutch hutment hydraulic_brake, hydraulic_brakes hydraulic_press hydraulic_pump, hydraulic_ram hydraulic_system hydraulic_transmission, hydraulic_transmission_system hydroelectric_turbine hydrofoil, hydroplane hydrofoil, foil hydrogen_bomb, H-bomb, fusion_bomb, thermonuclear_bomb hydrometer, gravimeter hygrodeik hygrometer hygroscope hyperbaric_chamber hypercoaster hypermarket hypodermic_needle hypodermic_syringe, hypodermic, hypo hypsometer hysterosalpingogram I-beam ice_ax, ice_axe, piolet iceboat, ice_yacht, scooter icebreaker, iceboat iced-tea_spoon ice_hockey_rink, ice-hockey_rink ice_machine ice_maker ice_pack, ice_bag icepick, ice_pick ice_rink, ice-skating_rink, ice ice_skate ice_tongs icetray iconoscope Identikit, Identikit_picture idle_pulley, idler_pulley, idle_wheel igloo, iglu ignition_coil ignition_key ignition_switch imaret immovable_bandage impact_printer impeller implant implement impression imprint improvised_explosive_device, I.E.D., IED impulse_turbine in-basket, in-tray incendiary_bomb, incendiary, firebomb incinerator inclined_plane inclinometer, dip_circle inclinometer incrustation, encrustation incubator, brooder index_register Indiaman Indian_club indicator induction_coil inductor, inductance industrial_watercourse inertial_guidance_system, inertial_navigation_system inflater, inflator inhaler, inhalator injector ink_bottle, inkpot ink_eraser ink-jet_printer inkle inkstand inkwell, inkstand inlay inside_caliper insole, innersole instep instillator institution instrument instrument_of_punishment instrument_of_torture intaglio, diaglyph intake_valve integrated_circuit, microcircuit integrator, planimeter Intelnet interceptor interchange intercommunication_system, intercom intercontinental_ballistic_missile, ICBM interface, port interferometer interior_door internal-combustion_engine, ICE internal_drive internet, net, cyberspace interphone interrupter intersection, crossroad, crossway, crossing, carrefour interstice intraocular_lens intravenous_pyelogram, IVP inverter ion_engine ionization_chamber, ionization_tube iPod video_iPod iron, smoothing_iron iron iron, branding_iron irons, chains ironclad iron_foundry iron_horse ironing iron_lung ironmongery ironworks irrigation_ditch izar jabot jack jack, jackstones jack jack jacket jacket jacket jack-in-the-box jack-o'-lantern jack_plane Jacob's_ladder, jack_ladder, pilot_ladder jaconet Jacquard_loom, Jacquard jacquard jag, dag jail, jailhouse, gaol, clink, slammer, poky, pokey jalousie jamb jammer jampot, jamjar japan jar Jarvik_heart, Jarvik_artificial_heart jaunting_car, jaunty_car javelin jaw Jaws_of_Life jean, blue_jean, denim jeep, landrover jellaba jerkin jeroboam, double-magnum jersey jersey, T-shirt, tee_shirt jet, jet_plane, jet-propelled_plane jet_bridge jet_engine jetliner jeweler's_glass jewelled_headdress, jeweled_headdress jew's_harp, jews'_harp, mouth_bow jib jibboom jig jig jiggermast, jigger jigsaw, scroll_saw, fretsaw jigsaw_puzzle jinrikisha, ricksha, rickshaw jobcentre jodhpurs, jodhpur_breeches, riding_breeches jodhpur, jodhpur_boot, jodhpur_shoe joinery joint Joint_Direct_Attack_Munition, JDAM jointer, jointer_plane, jointing_plane, long_plane joist jolly_boat, jolly jorum joss_house journal_bearing journal_box joystick jungle_gym junk jug jukebox, nickelodeon jumbojet, jumbo_jet jumper, pinafore, pinny jumper jumper jumper jumper_cable, jumper_lead, lead, booster_cable jump_seat jump_suit jump_suit, jumpsuit junction junction, conjunction junction_barrier, barrier_strip junk_shop jury_box jury_mast kachina kaffiyeh kalansuwa Kalashnikov kameez kanzu katharometer kayak kazoo keel keelboat keelson keep, donjon, dungeon keg kennel, doghouse, dog_house kepi, peaked_cap, service_cap, yachting_cap keratoscope kerchief ketch kettle, boiler kettle, kettledrum, tympanum, tympani, timpani key key keyboard keyboard_buffer keyboard_instrument keyhole keyhole_saw khadi, khaddar khaki khakis khimar khukuri kick_pleat kicksorter, pulse_height_analyzer kickstand kick_starter, kick_start kid_glove, suede_glove kiln kilt kimono kinescope, picture_tube, television_tube Kinetoscope king king kingbolt, kingpin, swivel_pin king_post Kipp's_apparatus kirk kirpan kirtle kirtle kit, outfit kit kitbag, kit_bag kitchen kitchen_appliance kitchenette kitchen_table kitchen_utensil kitchenware kite_balloon klaxon, claxon klieg_light klystron knee_brace knee-high, knee-hi knee_pad knee_piece knife knife knife_blade knight, horse knit knitting_machine knitting_needle knitwear knob, boss knob, pommel knobble knobkerrie, knobkerry knocker, doorknocker, rapper knot knuckle_joint, hinge_joint kohl koto kraal kremlin kris, creese, crease krummhorn, crumhorn, cromorne Kundt's_tube Kurdistan kurta kylix, cylix kymograph, cymograph lab_bench, laboratory_bench lab_coat, laboratory_coat lace lacquer lacquerware lacrosse_ball ladder-back ladder-back, ladder-back_chair ladder_truck, aerial_ladder_truck ladies'_room, powder_room ladle lady_chapel lagerphone lag_screw, lag_bolt lake_dwelling, pile_dwelling lally, lally_column lamasery lambrequin lame laminar_flow_clean_room laminate lamination lamp lamp lamp_house, lamphouse, lamp_housing lamppost lampshade, lamp_shade lanai lancet_arch, lancet lancet_window landau lander landing_craft landing_flap landing_gear landing_net landing_skid land_line, landline land_mine, ground-emplaced_mine, booby_trap land_office lanolin lantern lanyard, laniard lap, lap_covering laparoscope lapboard lapel lap_joint, splice laptop, laptop_computer laryngoscope laser, optical_maser laser-guided_bomb, LGB laser_printer lash, thong lashing lasso, lariat, riata, reata latch latch, door_latch latchet latchkey lateen, lateen_sail latex_paint, latex, rubber-base_paint lath lathe latrine lattice, latticework, fretwork launch launcher, rocket_launcher laundry, wash, washing, washables laundry_cart laundry_truck lavalava lavaliere, lavalier, lavalliere laver lawn_chair, garden_chair lawn_furniture lawn_mower, mower layette lead-acid_battery, lead-acid_accumulator lead-in leading_rein lead_pencil leaf_spring lean-to lean-to_tent leash, tether, lead leatherette, imitation_leather leather_strip Leclanche_cell lectern, reading_desk lecture_room lederhosen ledger_board leg leg legging, leging, leg_covering Leiden_jar, Leyden_jar leisure_wear lens, lense, lens_system lens, electron_lens lens_cap, lens_cover lens_implant, interocular_lens_implant, IOL leotard, unitard, body_suit, cat_suit letter_case letter_opener, paper_knife, paperknife levee level, spirit_level lever lever, lever_tumbler lever lever_lock Levi's, levis Liberty_ship library library lid Liebig_condenser lie_detector lifeboat life_buoy, lifesaver, life_belt, life_ring life_jacket, life_vest, cork_jacket life_office life_preserver, preserver, flotation_device life-support_system, life_support life-support_system, life_support lifting_device lift_pump ligament ligature light, light_source light_arm light_bulb, lightbulb, bulb, incandescent_lamp, electric_light, electric-light_bulb light_circuit, lighting_circuit light-emitting_diode, LED lighter, light, igniter, ignitor lighter-than-air_craft light_filter, diffusing_screen lighting light_machine_gun light_meter, exposure_meter, photometer light_microscope lightning_rod, lightning_conductor light_pen, electronic_stylus lightship Lilo limber limekiln limiter, clipper limousine, limo linear_accelerator, linac linen line_printer, line-at-a-time_printer liner, ocean_liner liner, lining lingerie, intimate_apparel lining, liner link, data_link linkage Link_trainer linocut linoleum_knife, linoleum_cutter Linotype, Linotype_machine linsey-woolsey linstock lion-jaw_forceps lip-gloss lipstick, lip_rouge liqueur_glass liquid_crystal_display, LCD liquid_metal_reactor lisle lister, lister_plow, lister_plough, middlebreaker, middle_buster litterbin, litter_basket, litter-basket little_theater, little_theatre live_axle, driving_axle living_quarters, quarters living_room, living-room, sitting_room, front_room, parlor, parlour load Loafer loaner lobe lobster_pot local local_area_network, LAN local_oscillator, heterodyne_oscillator Lochaber_ax lock lock, ignition_lock lock, lock_chamber lock lockage locker locker_room locket lock-gate locking_pliers lockring, lock_ring, lock_washer lockstitch lockup locomotive, engine, locomotive_engine, railway_locomotive lodge, indian_lodge lodge, hunting_lodge lodge lodging_house, rooming_house loft, attic, garret loft, pigeon_loft loft log_cabin loggia longbow long_iron long_johns long_sleeve long_tom long_trousers, long_pants long_underwear, union_suit looking_glass, glass lookout, observation_tower, lookout_station, observatory loom loop_knot lorgnette Lorraine_cross, cross_of_Lorraine lorry, camion lota lotion loudspeaker, speaker, speaker_unit, loudspeaker_system, speaker_system lounge, waiting_room, waiting_area lounger lounging_jacket, smoking_jacket lounging_pajama, lounging_pyjama loungewear loupe, jeweler's_loupe louvered_window, jalousie love_knot, lovers'_knot, lover's_knot, true_lovers'_knot, true_lover's_knot love_seat, loveseat, tete-a-tete, vis-a-vis loving_cup lowboy low-pass_filter low-warp-loom LP, L-P L-plate lubber's_hole lubricating_system, force-feed_lubricating_system, force_feed, pressure-feed_lubricating_system, pressure_feed luff lug luge Luger luggage_carrier luggage_compartment, automobile_trunk, trunk luggage_rack, roof_rack lugger lugsail, lug lug_wrench lumberjack, lumber_jacket lumbermill, sawmill lunar_excursion_module, lunar_module, LEM lunchroom lunette lungi, lungyi, longyi lunula lusterware lute luxury_liner, express_luxury_liner lyceum lychgate, lichgate lyre machete, matchet, panga machicolation machine machine, simple_machine machine_bolt machine_gun machinery machine_screw machine_tool machinist's_vise, metalworking_vise machmeter mackinaw mackinaw, Mackinaw_boat mackinaw, Mackinaw_coat mackintosh, macintosh macrame madras Mae_West, air_jacket magazine_rack magic_lantern magnet magnetic_bottle magnetic_compass magnetic_core_memory, core_memory magnetic_disk, magnetic_disc, disk, disc magnetic_head magnetic_mine magnetic_needle magnetic_recorder magnetic_stripe magnetic_tape, mag_tape, tape magneto, magnetoelectric_machine magnetometer, gaussmeter magnetron magnifier magnum magnus_hitch mail mailbag, postbag mailbag, mail_pouch mailboat, mail_boat, packet, packet_boat mailbox, letter_box mail_car maildrop mailer maillot maillot, tank_suit mailsorter mail_train mainframe, mainframe_computer mainmast main_rotor mainsail mainspring main-topmast main-topsail main_yard maisonette, maisonnette majolica, maiolica makeup, make-up, war_paint Maksutov_telescope malacca, malacca_cane mallet, beetle mallet, hammer mallet mammogram mandola mandolin manger, trough mangle manhole manhole_cover man-of-war, ship_of_the_line manometer manor, manor_house manor_hall, hall MANPAD mansard, mansard_roof manse mansion, mansion_house, manse, hall, residence mantel, mantelpiece, mantle, mantlepiece, chimneypiece mantelet, mantilla mantilla Mao_jacket map maquiladora maraca marble marching_order marimba, xylophone marina marker marketplace, market_place, mart, market marlinespike, marlinspike, marlingspike marocain, crepe_marocain marquee, marquise marquetry, marqueterie marriage_bed martello_tower martingale mascara maser masher mashie, five_iron mashie_niblick, seven_iron masjid, musjid mask mask Masonite Mason_jar masonry mason's_level massage_parlor massage_parlor mass_spectrograph mass_spectrometer, spectrometer mast mast mastaba, mastabah master_bedroom masterpiece, chef-d'oeuvre mat mat, gym_mat match, lucifer, friction_match match matchboard matchbook matchbox matchlock match_plane, tonguing_and_grooving_plane matchstick material materiel, equipage maternity_hospital maternity_ward matrix Matthew_Walker, Matthew_Walker_knot matting mattock mattress_cover maul, sledge, sledgehammer maulstick, mahlstick Mauser mausoleum maxi Maxim_gun maximum_and_minimum_thermometer maypole maze, labyrinth mazer means measure measuring_cup measuring_instrument, measuring_system, measuring_device measuring_stick, measure, measuring_rod meat_counter meat_grinder meat_hook meat_house meat_safe meat_thermometer mechanical_device mechanical_piano, Pianola, player_piano mechanical_system mechanism medical_building, health_facility, healthcare_facility medical_instrument medicine_ball medicine_chest, medicine_cabinet MEDLINE megalith, megalithic_structure megaphone memorial, monument memory, computer_memory, storage, computer_storage, store, memory_board memory_chip memory_device, storage_device menagerie, zoo, zoological_garden mending menhir, standing_stone menorah Menorah man's_clothing men's_room, men's mercantile_establishment, retail_store, sales_outlet, outlet mercury_barometer mercury_cell mercury_thermometer, mercury-in-glass_thermometer mercury-vapor_lamp mercy_seat merlon mess, mess_hall mess_jacket, monkey_jacket, shell_jacket mess_kit messuage metal_detector metallic metal_screw metal_wood meteorological_balloon meter meterstick, metrestick metronome mezzanine, mezzanine_floor, entresol mezzanine, first_balcony microbalance microbrewery microfiche microfilm micrometer, micrometer_gauge, micrometer_caliper microphone, mike microprocessor microscope microtome microwave, microwave_oven microwave_diathermy_machine microwave_linear_accelerator middy, middy_blouse midiron, two_iron mihrab mihrab military_hospital military_quarters military_uniform military_vehicle milk_bar milk_can milk_float milking_machine milking_stool milk_wagon, milkwagon mill, grinder, milling_machinery milldam miller, milling_machine milliammeter millinery, woman's_hat millinery, hat_shop milling millivoltmeter millstone millstone millwheel, mill_wheel mimeograph, mimeo, mimeograph_machine, Roneo, Roneograph minaret mincer, mincing_machine mine mine_detector minelayer mineshaft minibar, cellaret minibike, motorbike minibus minicar minicomputer ministry miniskirt, mini minisub, minisubmarine minivan miniver mink, mink_coat minster mint minute_hand, big_hand Minuteman mirror missile missile_defense_system, missile_defence_system miter_box, mitre_box miter_joint, mitre_joint, miter, mitre mitten mixer mixer mixing_bowl mixing_faucet mizzen, mizen mizzenmast, mizenmast, mizzen, mizen mobcap mobile_home, manufactured_home moccasin, mocassin mock-up mod_con Model_T modem modillion module module mohair moire, watered-silk mold, mould, cast moldboard, mouldboard moldboard_plow, mouldboard_plough moleskin Molotov_cocktail, petrol_bomb, gasoline_bomb monastery monastic_habit moneybag money_belt monitor monitor monitor, monitoring_device monkey-wrench, monkey_wrench monk's_cloth monochrome monocle, eyeglass monofocal_lens_implant, monofocal_IOL monoplane monotype monstrance, ostensorium mooring_tower, mooring_mast Moorish_arch, horseshoe_arch moped mop_handle moquette morgue, mortuary, dead_room morion, cabasset morning_dress morning_dress morning_room Morris_chair mortar, howitzer, trench_mortar mortar mortarboard mortise_joint, mortise-and-tenon_joint mosaic mosque mosquito_net motel motel_room Mother_Hubbard, muumuu motion-picture_camera, movie_camera, cine-camera motion-picture_film, movie_film, cine-film motley motley motor motorboat, powerboat motorcycle, bike motor_hotel, motor_inn, motor_lodge, tourist_court, court motorized_wheelchair motor_scooter, scooter motor_vehicle, automotive_vehicle mound, hill mound, hill, pitcher's_mound mount, setting mountain_bike, all-terrain_bike, off-roader mountain_tent mouse, computer_mouse mouse_button mousetrap mousse, hair_mousse, hair_gel mouthpiece, embouchure mouthpiece mouthpiece, gumshield movement movie_projector, cine_projector, film_projector moving-coil_galvanometer moving_van mud_brick mudguard, splash_guard, splash-guard mudhif muff muffle muffler mufti mug mulch mule, scuff multichannel_recorder multiengine_airplane, multiengine_plane multiplex multiplexer multiprocessor multistage_rocket, step_rocket munition, ordnance, ordnance_store Murphy_bed musette, shepherd's_pipe musette_pipe museum mushroom_anchor musical_instrument, instrument music_box, musical_box music_hall, vaudeville_theater, vaudeville_theatre music_school music_stand, music_rack music_stool, piano_stool musket musket_ball, ball muslin mustache_cup, moustache_cup mustard_plaster, sinapism mute muzzle_loader muzzle myelogram nacelle nail nailbrush nailfile nailhead nailhead nail_polish, nail_enamel, nail_varnish nainsook Napier's_bones, Napier's_rods nard, spikenard narrowbody_aircraft, narrow-body_aircraft, narrow-body narrow_wale narthex narthex nasotracheal_tube national_monument nautilus, nuclear_submarine, nuclear-powered_submarine navigational_system naval_equipment naval_gun naval_missile naval_radar naval_tactical_data_system naval_weaponry nave navigational_instrument nebuchadnezzar neckband neck_brace neckcloth, stock neckerchief necklace necklet neckline neckpiece necktie, tie neckwear needle needle needlenose_pliers needlework, needlecraft negative negative_magnetic_pole, negative_pole, south-seeking_pole negative_pole negligee, neglige, peignoir, wrapper, housecoat neolith neon_lamp, neon_induction_lamp, neon_tube nephoscope nest nest_egg net, network, mesh, meshing, meshwork net net net network, electronic_network network neutron_bomb newel newel_post, newel newspaper, paper newsroom newsroom newsstand Newtonian_telescope, Newtonian_reflector nib, pen_nib niblick, nine_iron nicad, nickel-cadmium_accumulator nickel-iron_battery, nickel-iron_accumulator Nicol_prism night_bell nightcap nightgown, gown, nightie, night-robe, nightdress night_latch night-light nightshirt nightwear, sleepwear, nightclothes ninepin, skittle, skittle_pin ninepin_ball, skittle_ball ninon nipple nipple_shield niqab Nissen_hut, Quonset_hut nogging noisemaker nonsmoker, nonsmoking_car non-volatile_storage, nonvolatile_storage Norfolk_jacket noria nosebag, feedbag noseband, nosepiece nose_flute nosewheel notebook, notebook_computer nuclear-powered_ship nuclear_reactor, reactor nuclear_rocket nuclear_weapon, atomic_weapon nude, nude_painting numdah, numdah_rug, nammad nun's_habit nursery, baby's_room nut_and_bolt nutcracker nylon nylons, nylon_stocking, rayons, rayon_stocking, silk_stocking oar oast oast_house obelisk object_ball objective, objective_lens, object_lens, object_glass oblique_bandage oboe, hautboy, hautbois oboe_da_caccia oboe_d'amore observation_dome observatory obstacle obturator ocarina, sweet_potato octant odd-leg_caliper odometer, hodometer, mileometer, milometer oeil_de_boeuf office, business_office office_building, office_block office_furniture officer's_mess off-line_equipment, auxiliary_equipment ogee, cyma_reversa ogee_arch, keel_arch ohmmeter oil, oil_color, oil_colour oilcan oilcloth oil_filter oil_heater, oilstove, kerosene_heater, kerosine_heater oil_lamp, kerosene_lamp, kerosine_lamp oil_paint oil_pump oil_refinery, petroleum_refinery oilskin, slicker oil_slick oilstone oil_tanker, oiler, tanker, tank_ship old_school_tie olive_drab olive_drab, olive-drab_uniform Olympian_Zeus omelet_pan, omelette_pan omnidirectional_antenna, nondirectional_antenna omnirange, omnidirectional_range, omnidirectional_radio_range onion_dome open-air_market, open-air_marketplace, market_square open_circuit open-end_wrench, tappet_wrench opener open-hearth_furnace openside_plane, rabbet_plane open_sight openwork opera, opera_house opera_cloak, opera_hood operating_microscope operating_room, OR, operating_theater, operating_theatre, surgery operating_table ophthalmoscope optical_device optical_disk, optical_disc optical_instrument optical_pyrometer, pyroscope optical_telescope orchestra_pit, pit ordinary, ordinary_bicycle organ, pipe_organ organdy, organdie organic_light-emitting_diode, OLED organ_loft organ_pipe, pipe, pipework organza oriel, oriel_window oriflamme O_ring Orlon orlop_deck, orlop, fourth_deck orphanage, orphans'_asylum orphrey orrery orthicon, image_orthicon orthochromatic_film orthopter, ornithopter orthoscope oscillograph oscilloscope, scope, cathode-ray_oscilloscope, CRO ossuary otoscope, auriscope, auroscope ottoman, pouf, pouffe, puff, hassock oubliette out-basket, out-tray outboard_motor, outboard outboard_motorboat, outboard outbuilding outerwear, overclothes outfall outfit, getup, rig, turnout outfitter outhouse, privy, earth-closet, jakes output_device outrigger outrigger_canoe outside_caliper outside_mirror outwork oven oven_thermometer overall overall, boilersuit, boilers_suit overcoat, overcoating overdrive overgarment, outer_garment overhand_knot overhang overhead_projector overmantel overnighter, overnight_bag, overnight_case overpass, flyover override overshoe overskirt oxbow Oxbridge oxcart oxeye oxford oximeter oxyacetylene_torch oxygen_mask oyster_bar oyster_bed, oyster_bank, oyster_park pace_car pacemaker, artificial_pacemaker pack pack pack, face_pack package, parcel package_store, liquor_store, off-licence packaging packet packing_box, packing_case packinghouse, packing_plant packinghouse packing_needle packsaddle paddle, boat_paddle paddle paddle paddle_box, paddle-box paddle_steamer, paddle-wheeler paddlewheel, paddle_wheel paddock padlock page_printer, page-at-a-time_printer paint, pigment paintball paintball_gun paintbox paintbrush paisley pajama, pyjama, pj's, jammies pajama, pyjama palace palace, castle palace palanquin, palankeen paleolith palestra, palaestra palette, pallet palette_knife palisade pallet pallette, palette pallium pallium pan pan, cooking_pan pancake_turner panchromatic_film panda_car paneling, panelling, pane panhandle panic_button pannier pannier pannikin panopticon panopticon panpipe, pandean_pipe, syrinx pantaloon pantechnicon pantheon pantheon pantie, panty, scanty, step-in panting, trousering pant_leg, trouser_leg pantograph pantry, larder, buttery pants_suit, pantsuit panty_girdle pantyhose panzer paper_chain paper_clip, paperclip, gem_clip paper_cutter paper_fastener paper_feed paper_mill paper_towel parabolic_mirror parabolic_reflector, paraboloid_reflector parachute, chute parallel_bars, bars parallel_circuit, shunt_circuit parallel_interface, parallel_port parang parapet, breastwork parapet parasail parasol, sunshade parer, paring_knife parfait_glass pargeting, pargetting, pargetry pari-mutuel_machine, totalizer, totaliser, totalizator, totalisator parka, windbreaker, windcheater, anorak park_bench parking_meter parlor, parlour parquet, parquet_floor parquetry, parqueterie parsonage, vicarage, rectory Parsons_table partial_denture particle_detector partition, divider parts_bin party_line party_wall parvis passenger_car, coach, carriage passenger_ship passenger_train passenger_van passe-partout passive_matrix_display passkey, passe-partout, master_key, master pass-through pastry_cart patch patchcord patchouli, patchouly, pachouli patch_pocket patchwork, patchwork_quilt patent_log, screw_log, taffrail_log paternoster patina patio, terrace patisserie patka patrol_boat, patrol_ship patty-pan pave pavilion, marquee pavior, paviour, paving_machine pavis, pavise pawn pawnbroker's_shop, pawnshop, loan_office pay-phone, pay-station PC_board peach_orchard pea_jacket, peacoat peavey, peavy, cant_dog, dog_hook pectoral, pectoral_medallion pedal, treadle, foot_pedal, foot_lever pedal_pusher, toreador_pants pedestal, plinth, footstall pedestal_table pedestrian_crossing, zebra_crossing pedicab, cycle_rickshaw pediment pedometer peeler peep_sight peg, nog peg, pin, thole, tholepin, rowlock, oarlock peg peg, wooden_leg, leg, pegleg pegboard Pelham pelican_crossing pelisse pelvimeter pen penal_colony penal_institution, penal_facility penalty_box pen-and-ink pencil pencil pencil_box, pencil_case pencil_sharpener pendant_earring, drop_earring, eardrop pendulum pendulum_clock pendulum_watch penetration_bomb penile_implant penitentiary, pen penknife penlight pennant, pennon, streamer, waft pennywhistle, tin_whistle, whistle penthouse pentode peplos, peplus, peplum peplum pepper_mill, pepper_grinder pepper_shaker, pepper_box, pepper_pot pepper_spray percale percolator percussion_cap percussion_instrument, percussive_instrument perforation perfume, essence perfumery perfumery perfumery peripheral, computer_peripheral, peripheral_device periscope peristyle periwig, peruke permanent_press, durable_press perpetual_motion_machine personal_computer, PC, microcomputer personal_digital_assistant, PDA, personal_organizer, personal_organiser, organizer, organiser personnel_carrier pestle pestle, muller, pounder petcock Petri_dish petrolatum_gauze pet_shop petticoat, half-slip, underskirt pew, church_bench phial, vial, ampule, ampul, ampoule Phillips_screw Phillips_screwdriver phonograph_needle, needle phonograph_record, phonograph_recording, record, disk, disc, platter photocathode photocoagulator photocopier photographic_equipment photographic_paper, photographic_material photometer photomicrograph Photostat, Photostat_machine photostat physical_pendulum, compound_pendulum piano, pianoforte, forte-piano piano_action piano_keyboard, fingerboard, clavier piano_wire piccolo pick, pickax, pickaxe pick pick, plectrum, plectron pickelhaube picket_boat picket_fence, paling picket_ship pickle_barrel pickup, pickup_truck picture, image, icon, ikon picture_frame picture_hat picture_rail picture_window piece_of_cloth, piece_of_material pied-a-terre pier pier pier_arch pier_glass, pier_mirror pier_table pieta piezometer pig_bed, pig piggery, pig_farm piggy_bank, penny_bank pilaster pile, spile, piling, stilt pile_driver pill_bottle pillbox, toque, turban pillion pillory pillow pillow_block pillow_lace, bobbin_lace pillow_sham pilot_bit pilot_boat pilot_burner, pilot_light, pilot pilot_cloth pilot_engine pilothouse, wheelhouse pilot_light, pilot_lamp, indicator_lamp pin pin, flag pin, pin_tumbler pinata pinball_machine, pin_table pince-nez pincer, pair_of_pincers, tweezer, pair_of_tweezers pinch_bar pincurl_clip pinfold ping-pong_ball pinhead pinion pinnacle pinprick pinstripe pinstripe pinstripe pintle pinwheel, pinwheel_wind_collector pinwheel tabor_pipe pipe pipe_bomb pipe_cleaner pipe_cutter pipefitting, pipe_fitting pipet, pipette pipe_vise, pipe_clamp pipe_wrench, tube_wrench pique pirate, pirate_ship piste pistol, handgun, side_arm, shooting_iron pistol_grip piston, plunger piston_ring piston_rod pit pitcher, ewer pitchfork pitching_wedge pitch_pipe pith_hat, pith_helmet, sun_helmet, topee, topi piton Pitot-static_tube, Pitot_head, Pitot_tube Pitot_tube, Pitot pitsaw pivot, pin pivoting_window pizzeria, pizza_shop, pizza_parlor place_of_business, business_establishment place_of_worship, house_of_prayer, house_of_God, house_of_worship placket planchet, coin_blank plane, carpenter's_plane, woodworking_plane plane, planer, planing_machine plane_seat planetarium planetarium planetarium planetary_gear, epicyclic_gear, planet_wheel, planet_gear plank-bed planking planner plant, works, industrial_plant planter plaster, adhesive_plaster, sticking_plaster plasterboard, gypsum_board plastering_trowel plastic_bag plastic_bomb plastic_laminate plastic_wrap plastron plastron plastron plate, scale, shell plate, collection_plate plate platen platen plate_rack plate_rail platform platform, weapons_platform platform platform_bed platform_rocker plating, metal_plating platter playback playbox, play-box playground playpen, pen playsuit plaza, mall, center, shopping_mall, shopping_center, shopping_centre pleat, plait plenum plethysmograph pleximeter, plessimeter plexor, plessor, percussor pliers, pair_of_pliers, plyers plimsoll plotter plow, plough plug, stopper, stopple plug, male_plug plug_fuse plughole plumb_bob, plumb, plummet plumb_level plunger, plumber's_helper plus_fours plush plywood, plyboard pneumatic_drill p-n_junction p-n-p_transistor poacher pocket pocket_battleship pocketcomb, pocket_comb pocket_flap pocket-handkerchief pocketknife, pocket_knife pocket_watch pod, fuel_pod pogo_stick point-and-shoot_camera pointed_arch pointing_trowel point_lace, needlepoint poker, stove_poker, fire_hook, salamander polarimeter, polariscope Polaroid Polaroid_camera, Polaroid_Land_camera pole pole poleax, poleaxe poleax, poleaxe police_boat police_van, police_wagon, paddy_wagon, patrol_wagon, wagon, black_Maria polling_booth polo_ball polo_mallet, polo_stick polonaise polo_shirt, sport_shirt polyester polygraph pomade, pomatum pommel_horse, side_horse poncho pongee poniard, bodkin pontifical pontoon pontoon_bridge, bateau_bridge, floating_bridge pony_cart, ponycart, donkey_cart, tub-cart pool_ball poolroom pool_table, billiard_table, snooker_table poop_deck poor_box, alms_box, mite_box poorhouse pop_bottle, soda_bottle popgun poplin popper poppet, poppet_valve pop_tent porcelain porch porkpie, porkpie_hat porringer portable portable_computer portable_circular_saw, portable_saw portcullis porte-cochere porte-cochere portfolio porthole portico portiere portmanteau, Gladstone, Gladstone_bag portrait_camera portrait_lens positive_pole, positive_magnetic_pole, north-seeking_pole positive_pole positron_emission_tomography_scanner, PET_scanner post postage_meter post_and_lintel post_chaise postern post_exchange, PX posthole_digger, post-hole_digger post_horn posthouse, post_house pot pot, flowerpot potbelly, potbelly_stove Potemkin_village potential_divider, voltage_divider potentiometer, pot potentiometer potpourri potsherd potter's_wheel pottery, clayware pottle potty_seat, potty_chair pouch poultice, cataplasm, plaster pound, dog_pound pound_net powder powder_and_shot powdered_mustard, dry_mustard powder_horn, powder_flask powder_keg power_brake power_cord power_drill power_line, power_cable power_loom power_mower, motor_mower power_pack power_saw, saw, sawing_machine power_shovel, excavator, digger, shovel power_steering, power-assisted_steering power_takeoff, PTO power_tool praetorium, pretorium prayer_rug, prayer_mat prayer_shawl, tallith, tallis precipitator, electrostatic_precipitator, Cottrell_precipitator prefab presbytery presence_chamber press, mechanical_press press, printing_press press press_box press_gallery press_of_sail, press_of_canvas pressure_cabin pressure_cooker pressure_dome pressure_gauge, pressure_gage pressurized_water_reactor, PWR pressure_suit pricket prie-dieu primary_coil, primary_winding, primary Primus_stove, Primus Prince_Albert print print_buffer printed_circuit printer, printing_machine printer printer_cable priory prison, prison_house prison_camp, internment_camp, prisoner_of_war_camp, POW_camp privateer private_line privet_hedge probe proctoscope prod, goad production_line, assembly_line, line projectile, missile projector projector prolonge prolonge_knot, sailor's_breastplate prompter, autocue prong propeller, propellor propeller_plane propjet, turboprop, turbo-propeller_plane proportional_counter_tube, proportional_counter propulsion_system proscenium, proscenium_wall proscenium_arch prosthesis, prosthetic_device protective_covering, protective_cover, protection protective_garment proton_accelerator protractor pruner, pruning_hook, lopper pruning_knife pruning_saw pruning_shears psaltery psychrometer PT_boat, mosquito_boat, mosquito_craft, motor_torpedo_boat public_address_system, P.A._system, PA_system, P.A., PA public_house, pub, saloon, pothouse, gin_mill, taphouse public_toilet, comfort_station, public_convenience, convenience, public_lavatory, restroom, toilet_facility, wash_room public_transport public_works puck, hockey_puck pull pullback, tieback pull_chain pulley, pulley-block, pulley_block, block pull-off, rest_area, rest_stop, layby, lay-by Pullman, Pullman_car pullover, slipover pull-through pulse_counter pulse_generator pulse_timing_circuit pump pump pump_action, slide_action pump_house, pumping_station pump_room pump-type_pliers pump_well punch, puncher punchboard punch_bowl punching_bag, punch_bag, punching_ball, punchball punch_pliers punch_press punnet punt pup_tent, shelter_tent purdah purifier purl, purl_stitch purse push-bike push_broom push_button, push, button push-button_radio pusher, zori put-put puttee putter, putting_iron putty_knife puzzle pylon, power_pylon pylon pyramidal_tent pyrograph pyrometer pyrometric_cone pyrostat pyx, pix pyx, pix, pyx_chest, pix_chest pyxis quad, quadrangle quadrant quadraphony, quadraphonic_system, quadriphonic_system quartering quarterstaff quartz_battery, quartz_mill quartz_lamp queen queen queen_post quern quill, quill_pen quilt, comforter, comfort, puff quilted_bedspread quilting quipu quirk_molding, quirk_moulding quirt quiver quoin, coign, coigne quoit QWERTY_keyboard rabbet, rebate rabbet_joint rabbit_ears rabbit_hutch raceabout racer, race_car, racing_car raceway, race racing_boat racing_gig racing_skiff, single_shell rack, stand rack rack, wheel rack_and_pinion racket, racquet racquetball radar, microwave_radar, radio_detection_and_ranging, radiolocation radial, radial_tire, radial-ply_tire radial_engine, rotary_engine radiation_pyrometer radiator radiator radiator_cap radiator_hose radio, wireless radio_antenna, radio_aerial radio_chassis radio_compass radiogram, radiograph, shadowgraph, skiagraph, skiagram radio_interferometer radio_link, link radiometer radiomicrometer radio-phonograph, radio-gramophone radio_receiver, receiving_set, radio_set, radio, tuner, wireless radiotelegraph, radiotelegraphy, wireless_telegraph, wireless_telegraphy radiotelephone, radiophone, wireless_telephone radio_telescope, radio_reflector radiotherapy_equipment radio_transmitter radome, radar_dome raft rafter, balk, baulk raft_foundation rag, shred, tag, tag_end, tatter ragbag raglan raglan_sleeve rail rail_fence railhead railing, rail railing railroad_bed railroad_tunnel rain_barrel raincoat, waterproof rain_gauge, rain_gage, pluviometer, udometer rain_stick rake rake_handle RAM_disk ramekin, ramequin ramjet, ramjet_engine, atherodyde, athodyd, flying_drainpipe rammer ramp, incline rampant_arch rampart, bulwark, wall ramrod ramrod ranch, spread, cattle_ranch, cattle_farm ranch_house random-access_memory, random_access_memory, random_memory, RAM, read/write_memory rangefinder, range_finder range_hood range_pole, ranging_pole, flagpole rapier, tuck rariora rasp, wood_file ratchet, rachet, ratch ratchet_wheel rathskeller ratline, ratlin rat-tail_file rattan, ratan rattrap rayon razor razorblade reaction-propulsion_engine, reaction_engine reaction_turbine reactor reading_lamp reading_room read-only_memory, ROM, read-only_storage, fixed_storage read-only_memory_chip readout, read-out read/write_head, head ready-to-wear real_storage reamer reamer, juicer, juice_reamer rearview_mirror Reaumur_thermometer rebozo receiver, receiving_system receptacle reception_desk reception_room recess, niche reciprocating_engine recliner, reclining_chair, lounger reconnaissance_plane reconnaissance_vehicle, scout_car record_changer, auto-changer, changer recorder, recording_equipment, recording_machine recording recording_system record_player, phonograph record_sleeve, record_cover recovery_room recreational_vehicle, RV, R.V. recreation_room, rec_room recycling_bin recycling_plant redbrick_university red_carpet redoubt redoubt reduction_gear reed_pipe reed_stop reef_knot, flat_knot reel reel refectory refectory_table refinery reflecting_telescope, reflector reflectometer reflector reflex_camera reflux_condenser reformatory, reform_school, training_school reformer refracting_telescope refractometer refrigeration_system refrigerator, icebox refrigerator_car refuge, sanctuary, asylum regalia regimentals regulator rein relay, electrical_relay release, button religious_residence, cloister reliquary remote_control, remote remote_terminal, link-attached_terminal, remote_station, link-attached_station removable_disk rendering rep, repp repair_shop, fix-it_shop repeater repeating_firearm, repeater repository, monument reproducer rerebrace, upper_cannon rescue_equipment research_center, research_facility reseau reservoir reset reset_button residence resistance_pyrometer resistor, resistance resonator resonator, cavity_resonator, resonating_chamber resort_hotel, spa respirator, inhalator restaurant, eating_house, eating_place, eatery rest_house restraint, constraint resuscitator retainer retaining_wall reticle, reticule, graticule reticulation reticule retort retractor return_key, return reverberatory_furnace revers, revere reverse, reverse_gear reversible revetment, revetement, stone_facing revetment revolver, six-gun, six-shooter revolving_door, revolver rheometer rheostat, variable_resistor rhinoscope rib riband, ribband ribbed_vault ribbing ribbon_development rib_joint_pliers ricer riddle ride ridge, ridgepole, rooftree ridge_rope riding_boot riding_crop, hunting_crop riding_mower rifle rifle_ball rifle_grenade rig rigger, rigger_brush rigger rigging, tackle rigout ringlet rings rink, skating_rink riot_gun ripcord ripcord ripping_bar ripping_chisel ripsaw, splitsaw riser riser, riser_pipe, riser_pipeline, riser_main Ritz river_boat rivet riveting_machine, riveter, rivetter roach_clip, roach_holder road, route roadbed roadblock, barricade roadhouse roadster, runabout, two-seater roadway roaster robe robotics_equipment Rochon_prism, Wollaston_prism rock_bit, roller_bit rocker rocker, cradle rocker_arm, valve_rocker rocket, rocket_engine rocket, projectile rocking_chair, rocker rod rodeo roll roller roller roller_bandage in-line_skate Rollerblade roller_blind roller_coaster, big_dipper, chute-the-chute roller_skate roller_towel roll_film rolling_hitch rolling_mill rolling_pin rolling_stock roll-on roll-on roll-on_roll-off Rolodex Roman_arch, semicircular_arch Roman_building romper, romper_suit rood_screen roof roof roofing room roomette room_light roost rope rope_bridge rope_tow rose_water rose_window, rosette rosin_bag rotary_actuator, positioner rotary_engine rotary_press rotating_mechanism rotating_shaft, shaft rotisserie rotisserie rotor rotor, rotor_coil rotor rotor_blade, rotary_wing rotor_head, rotor_shaft rotunda rotunda rouge, paint, blusher roughcast rouleau roulette, toothed_wheel roulette_ball roulette_wheel, wheel round, unit_of_ammunition, one_shot round_arch round-bottom_flask roundel round_file roundhouse router router router_plane rowel row_house, town_house rowing_boat rowlock_arch royal royal_mast rubber_band, elastic_band, elastic rubber_boot, gum_boot rubber_bullet rubber_eraser, rubber, pencil_eraser rudder rudder rudder_blade rug, carpet, carpeting rugby_ball ruin rule, ruler rumble rumble_seat rummer rumpus_room, playroom, game_room runcible_spoon rundle, spoke, rung running_shoe running_suit runway rushlight, rush_candle russet rya, rya_rug saber, sabre saber_saw, jigsaw, reciprocating_saw sable sable, sable_brush, sable's_hair_pencil sable_coat sabot, wooden_shoe sachet sack, poke, paper_bag, carrier_bag sack, sacque sackbut sackcloth sackcloth sack_coat sacking, bagging saddle saddlebag saddle_blanket, saddlecloth, horse_blanket saddle_oxford, saddle_shoe saddlery saddle_seat saddle_stitch safe safe safe-deposit, safe-deposit_box, safety-deposit, safety_deposit_box, deposit_box, lockbox safe_house safety_arch safety_belt, life_belt, safety_harness safety_bicycle, safety_bike safety_bolt, safety_lock safety_curtain safety_fuse safety_lamp, Davy_lamp safety_match, book_matches safety_net safety_pin safety_rail, guardrail safety_razor safety_valve, relief_valve, escape_valve, escape_cock, escape sail, canvas, canvass, sheet sail sailboat, sailing_boat sailcloth sailing_vessel, sailing_ship sailing_warship sailor_cap sailor_suit salad_bar salad_bowl salinometer sallet, salade salon salon salon, beauty_salon, beauty_parlor, beauty_parlour, beauty_shop saltbox saltcellar saltshaker, salt_shaker saltworks salver salwar, shalwar Sam_Browne_belt samisen, shamisen samite samovar sampan sandal sandbag sandblaster sandbox sandglass sand_wedge sandwich_board sanitary_napkin, sanitary_towel, Kotex cling_film, clingfilm, Saran_Wrap sarcenet, sarsenet sarcophagus sari, saree sarong sash, window_sash sash_fastener, sash_lock, window_lock sash_window satchel sateen satellite, artificial_satellite, orbiter satellite_receiver satellite_television, satellite_TV satellite_transmitter satin Saturday_night_special saucepan saucepot sauna, sweat_room savings_bank, coin_bank, money_box, bank saw sawed-off_shotgun sawhorse, horse, sawbuck, buck sawmill saw_set sax, saxophone saxhorn scabbard scaffolding, staging scale scale, weighing_machine scaler scaling_ladder scalpel scanner, electronic_scanner scanner scanner, digital_scanner, image_scanner scantling, stud scarf scarf_joint, scarf scatter_rug, throw_rug scauper, scorper Schmidt_telescope, Schmidt_camera school, schoolhouse schoolbag school_bell school_bus school_ship, training_ship school_system schooner schooner scientific_instrument scimitar scintillation_counter scissors, pair_of_scissors sclerometer scoinson_arch, sconcheon_arch sconce sconce scoop scooter scoreboard scouring_pad scow scow scraper scratcher screen screen, cover, covert, concealment screen screen, CRT_screen screen_door, screen screening screw screw, screw_propeller screw screwdriver screw_eye screw_key screw_thread, thread screwtop screw_wrench scriber, scribe, scratch_awl scrim scrimshaw scriptorium scrubber scrub_brush, scrubbing_brush, scrubber scrub_plane scuffer scuffle, scuffle_hoe, Dutch_hoe scull scull scullery sculpture scuttle, coal_scuttle scyphus scythe seabag sea_boat sea_chest sealing_wax, seal sealskin seam seaplane, hydroplane searchlight searing_iron seat seat seat seat_belt, seatbelt secateurs secondary_coil, secondary_winding, secondary second_balcony, family_circle, upper_balcony, peanut_gallery second_base second_hand secretary, writing_table, escritoire, secretaire sectional security_blanket security_system, security_measure, security security_system sedan, saloon sedan, sedan_chair seeder seeker seersucker segmental_arch Segway, Segway_Human_Transporter, Segway_HT seidel seine seismograph selector, selector_switch selenium_cell self-propelled_vehicle self-registering_thermometer self-starter selsyn, synchro selvage, selvedge semaphore semiautomatic_firearm semiautomatic_pistol, semiautomatic semiconductor_device, semiconductor_unit, semiconductor semi-detached_house semigloss semitrailer, semi sennit sensitometer sentry_box separate septic_tank sequence, episode sequencer, sequenator serape, sarape serge serger serial_port serpent serration server server, host service_club serving_cart serving_dish servo, servomechanism, servosystem set set_gun, spring_gun setscrew setscrew set_square settee settle, settee settlement_house seventy-eight, 78 Seven_Wonders_of_the_Ancient_World, Seven_Wonders_of_the_World sewage_disposal_plant, disposal_plant sewer, sewerage, cloaca sewing_basket sewing_kit sewing_machine sewing_needle sewing_room sextant sgraffito shackle, bond, hamper, trammel shackle shade shadow_box shaft shag_rug shaker shank shank, stem shantung shaper, shaping_machine shaping_tool sharkskin sharpener Sharpie shaver, electric_shaver, electric_razor shaving_brush shaving_cream, shaving_soap shaving_foam shawl shawm shears sheath sheathing, overlay, overlayer shed sheep_bell sheepshank sheepskin_coat, afghan sheepwalk, sheeprun sheet, bed_sheet sheet_bend, becket_bend, weaver's_knot, weaver's_hitch sheeting sheet_pile, sheath_pile, sheet_piling Sheetrock shelf shelf_bracket shell shell, case, casing shell, racing_shell shellac, shellac_varnish shelter shelter shelter sheltered_workshop Sheraton shield, buckler shield shielding shift_key, shift shillelagh, shillalah shim shingle shin_guard, shinpad ship shipboard_system shipping, cargo_ships, merchant_marine, merchant_vessels shipping_room ship-towed_long-range_acoustic_detection_system shipwreck shirt shirt_button shirtdress shirtfront shirting shirtsleeve shirttail shirtwaist, shirtwaister shiv shock_absorber, shock, cushion shoe shoe shoebox shoehorn shoe_shop, shoe-shop, shoe_store shoetree shofar, shophar shoji shooting_brake shooting_lodge, shooting_box shooting_stick shop, store shop_bell shopping_bag shopping_basket shopping_cart short_circuit, short short_iron short_pants, shorts, trunks short_sleeve shortwave_diathermy_machine shot shot_glass, jigger, pony shotgun, scattergun shotgun_shell shot_tower shoulder shoulder_bag shouldered_arch shoulder_holster shoulder_pad shoulder_patch shovel shovel shovel_hat showboat shower shower_cap shower_curtain shower_room shower_stall, shower_bath showroom, salesroom, saleroom shrapnel shredder shrimper shrine shrink-wrap shunt shunt, electrical_shunt, bypass shunter shutter shutter shuttle shuttle shuttle_bus shuttlecock, bird, birdie, shuttle shuttle_helicopter Sibley_tent sickbay, sick_berth sickbed sickle, reaping_hook, reap_hook sickroom sideboard sidecar side_chapel sidelight, running_light sidesaddle sidewalk, pavement sidewall side-wheeler sidewinder sieve, screen sifter sights sigmoidoscope, flexible_sigmoidoscope signal_box, signal_tower signaling_device signboard, sign silencer, muffler silent_butler Silex silk silks silo silver_plate silverpoint simple_pendulum simulator single_bed single-breasted_jacket single-breasted_suit single_prop, single-propeller_plane single-reed_instrument, single-reed_woodwind single-rotor_helicopter singlestick, fencing_stick, backsword singlet, vest, undershirt siren sister_ship sitar sitz_bath, hip_bath six-pack, six_pack, sixpack skate skateboard skeg skein skeleton, skeletal_frame, frame, underframe skeleton_key skep skep sketch, study sketcher skew_arch skewer ski ski_binding, binding skibob ski_boot ski_cap, stocking_cap, toboggan_cap skidder skid_lid skiff ski_jump ski_lodge ski_mask skimmer ski_parka, ski_jacket ski-plane ski_pole ski_rack skirt skirt ski_tow, ski_lift, lift Skivvies skullcap skybox skyhook skylight, fanlight skysail skyscraper skywalk slacks slack_suit slasher slash_pocket slat, spline slate slate_pencil slate_roof sled, sledge, sleigh sleeper sleeper sleeping_bag sleeping_car, sleeper, wagon-lit sleeve, arm sleeve sleigh_bed sleigh_bell, cascabel slice_bar slicer slicer slide, playground_slide, sliding_board slide_fastener, zip, zipper, zip_fastener slide_projector slide_rule, slipstick slide_valve sliding_door sliding_seat sliding_window sling, scarf_bandage, triangular_bandage sling slingback, sling slinger_ring slip_clutch, slip_friction_clutch slipcover slip-joint_pliers slipknot slip-on slipper, carpet_slipper slip_ring slit_lamp slit_trench sloop sloop_of_war slop_basin, slop_bowl slop_pail, slop_jar slops slopshop, slopseller's_shop slot, one-armed_bandit slot_machine, coin_machine sluice, sluiceway, penstock smack small_boat small_computer_system_interface, SCSI small_ship small_stores smart_bomb smelling_bottle smocking smoke_bomb, smoke_grenade smokehouse, meat_house smoker, smoking_car, smoking_carriage, smoking_compartment smoke_screen, smokescreen smoking_room smoothbore smooth_plane, smoothing_plane snack_bar, snack_counter, buffet snaffle, snaffle_bit snap, snap_fastener, press_stud snap_brim snap-brim_hat snare, gin, noose snare_drum, snare, side_drum snatch_block snifter, brandy_snifter, brandy_glass sniper_rifle, precision_rifle snips, tinsnips Sno-cat snood snorkel, schnorkel, schnorchel, snorkel_breather, breather snorkel snowbank, snow_bank snowboard snowmobile snowplow, snowplough snowshoe snowsuit snow_thrower, snow_blower snuffbox snuffer snuffers soapbox soap_dish soap_dispenser soap_pad soccer_ball sock socket socket_wrench socle soda_can soda_fountain soda_fountain sod_house, soddy, adobe_house sodium-vapor_lamp, sodium-vapour_lamp sofa, couch, lounge soffit softball, playground_ball soft_pedal soil_pipe solar_array, solar_battery, solar_panel solar_cell, photovoltaic_cell solar_dish, solar_collector, solar_furnace solar_heater solar_house solar_telescope solar_thermal_system soldering_iron solenoid solleret, sabaton sombrero sonic_depth_finder, fathometer sonogram, echogram sonograph sorter souk sound_bow soundbox, body sound_camera sounder sound_film sounding_board, soundboard sounding_rocket sound_recording, audio_recording, audio sound_spectrograph soup_bowl soup_ladle soupspoon, soup_spoon source_of_illumination sourdine soutache soutane sou'wester soybean_future space_bar space_capsule, capsule spacecraft, ballistic_capsule, space_vehicle space_heater space_helmet space_rocket space_shuttle space_station, space_platform, space_laboratory spacesuit spade spade_bit spaghetti_junction Spandau spandex spandrel, spandril spanker spar sparge_pipe spark_arrester, sparker spark_arrester spark_chamber, spark_counter spark_coil spark_gap spark_lever spark_plug, sparking_plug, plug sparkplug_wrench spark_transmitter spat, gaiter spatula spatula speakerphone speaking_trumpet spear, lance, shaft spear, gig, fizgig, fishgig, lance specialty_store specimen_bottle spectacle spectacles, specs, eyeglasses, glasses spectator_pump, spectator spectrograph spectrophotometer spectroscope, prism_spectroscope speculum speedboat speed_bump speedometer, speed_indicator speed_skate, racing_skate spherometer sphygmomanometer spicemill spice_rack spider spider_web, spider's_web spike spike spindle spindle, mandrel, mandril, arbor spindle spin_dryer, spin_drier spinet spinet spinnaker spinner spinning_frame spinning_jenny spinning_machine spinning_rod spinning_wheel spiral_bandage spiral_ratchet_screwdriver, ratchet_screwdriver spiral_spring spirit_lamp spirit_stove spirometer spit spittoon, cuspidor splashboard, splasher, dashboard splasher splice, splicing splicer splint split_rail, fence_rail Spode spoiler spoiler spoke, wheel_spoke, radius spokeshave sponge_cloth sponge_mop spoon spoon Spork sporran sport_kite, stunt_kite sports_car, sport_car sports_equipment sports_implement sportswear, athletic_wear, activewear sport_utility, sport_utility_vehicle, S.U.V., SUV spot spotlight, spot spot_weld, spot-weld spouter sprag spray_gun spray_paint spreader sprig spring spring_balance, spring_scale springboard sprinkler sprinkler_system sprit spritsail sprocket, sprocket_wheel sprocket spun_yarn spur, gad spur_gear, spur_wheel sputnik spy_satellite squad_room square square_knot square-rigger square_sail squash_ball squash_racket, squash_racquet, bat squawk_box, squawker, intercom_speaker squeegee squeezer squelch_circuit, squelch, squelcher squinch stabilizer, stabiliser stabilizer stabilizer_bar, anti-sway_bar stable, stalls, horse_barn stable_gear, saddlery, tack stabling stacks staddle stadium, bowl, arena, sports_stadium stage stagecoach, stage stained-glass_window stair-carpet stair-rod stairwell stake stall, stand, sales_booth stall stamp stamp_mill, stamping_mill stamping_machine, stamper stanchion stand standard standard_cell standard_transmission, stick_shift standing_press stanhope Stanley_Steamer staple staple staple_gun, staplegun, tacker stapler, stapling_machine starship, spaceship starter, starter_motor, starting_motor starting_gate, starting_stall Stassano_furnace, electric-arc_furnace Statehouse stately_home state_prison stateroom static_tube station stator, stator_coil statue stay staysail steakhouse, chophouse steak_knife stealth_aircraft stealth_bomber stealth_fighter steam_bath, steam_room, vapor_bath, vapour_bath steamboat steam_chest steam_engine steamer, steamship steamer steam_iron steam_locomotive steamroller, road_roller steam_shovel steam_turbine steam_whistle steel steel_arch_bridge steel_drum steel_mill, steelworks, steel_plant, steel_factory steel-wool_pad steelyard, lever_scale, beam_scale steeple, spire steerage steering_gear steering_linkage steering_system, steering_mechanism steering_wheel, wheel stele, stela stem-winder stencil Sten_gun stenograph step, stair step-down_transformer step_stool step-up_transformer stereo, stereophony, stereo_system, stereophonic_system stereoscope stern_chaser sternpost sternwheeler stethoscope stewing_pan, stewpan stick stick stick, control_stick, joystick stick stile stiletto still stillroom, still_room Stillson_wrench stilt Stinger stink_bomb, stench_bomb stirrer stirrup, stirrup_iron stirrup_pump stob stock, gunstock stockade stockcar stock_car stockinet, stockinette stocking stock-in-trade stockpot stockroom, stock_room stocks stock_saddle, Western_saddle stockyard stole stomacher stomach_pump stone_wall stoneware stonework stool stoop, stoep stop_bath, short-stop, short-stop_bath stopcock, cock, turncock stopper_knot stopwatch, stop_watch storage_battery, accumulator storage_cell, secondary_cell storage_ring storage_space storeroom, storage_room, stowage storm_cellar, cyclone_cellar, tornado_cellar storm_door storm_window, storm_sash stoup, stoop stoup stove stove, kitchen_stove, range, kitchen_range, cooking_stove stove_bolt stovepipe stovepipe_iron Stradavarius, Strad straight_chair, side_chair straightedge straightener straight_flute, straight-fluted_drill straight_pin straight_razor strainer straitjacket, straightjacket strap strap strap_hinge, joint_hinge strapless streamer_fly streamliner street street streetcar, tram, tramcar, trolley, trolley_car street_clothes streetlight, street_lamp stretcher stretcher stretch_pants strickle strickle stringed_instrument stringer stringer string_tie strip strip_lighting strip_mall stroboscope, strobe, strobe_light strongbox, deedbox stronghold, fastness strongroom strop structural_member structure, construction student_center student_lamp student_union stud_finder studio_apartment, studio studio_couch, day_bed study study_hall stuffing_nut, packing_nut stump stun_gun, stun_baton stupa, tope sty, pigsty, pigpen stylus, style stylus sub-assembly subcompact, subcompact_car submachine_gun submarine, pigboat, sub, U-boat submarine_torpedo submersible, submersible_warship submersible subtracter subway_token subway_train subwoofer suction_cup suction_pump sudatorium, sudatory suede_cloth, suede sugar_bowl sugar_refinery sugar_spoon, sugar_shell suit, suit_of_clothes suite, rooms suiting sulky summer_house sumo_ring sump sump_pump sunbonnet Sunday_best, Sunday_clothes sun_deck sundial sundress sundries sun_gear sunglass sunglasses, dark_glasses, shades sunhat, sun_hat sunlamp, sun_lamp, sunray_lamp, sun-ray_lamp sun_parlor, sun_parlour, sun_porch, sunporch, sunroom, sun_lounge, solarium sunroof, sunshine-roof sunscreen, sunblock, sun_blocker sunsuit supercharger supercomputer superconducting_supercollider superhighway, information_superhighway supermarket superstructure supertanker supper_club supplejack supply_chamber supply_closet support support support_column support_hose, support_stocking supporting_structure supporting_tower surcoat surface_gauge, surface_gage, scribing_block surface_lift surface_search_radar surface_ship surface-to-air_missile, SAM surface-to-air_missile_system surfboat surcoat surgeon's_knot surgery surge_suppressor, surge_protector, spike_suppressor, spike_arrester, lightning_arrester surgical_dressing surgical_instrument surgical_knife surplice surrey surtout surveillance_system surveying_instrument, surveyor's_instrument surveyor's_level sushi_bar suspension, suspension_system suspension_bridge suspensory, suspensory_bandage sustaining_pedal, loud_pedal suture, surgical_seam swab, swob, mop swab swaddling_clothes, swaddling_bands swag swage_block swagger_stick swallow-tailed_coat, swallowtail, morning_coat swamp_buggy, marsh_buggy swan's_down swathe, wrapping swatter, flyswatter, flyswat sweat_bag sweatband sweater, jumper sweat_pants, sweatpants sweatshirt sweatshop sweat_suit, sweatsuit, sweats, workout_suit sweep, sweep_oar sweep_hand, sweep-second swimming_trunks, bathing_trunks swimsuit, swimwear, bathing_suit, swimming_costume, bathing_costume swing swing_door, swinging_door switch, electric_switch, electrical_switch switchblade, switchblade_knife, flick-knife, flick_knife switch_engine, donkey_engine swivel swivel_chair swizzle_stick sword, blade, brand, steel sword_cane, sword_stick S_wrench synagogue, temple, tabernacle synchrocyclotron synchroflash synchromesh synchronous_converter, rotary, rotary_converter synchronous_motor synchrotron synchroscope, synchronoscope, synchronizer, synchroniser synthesizer, synthesiser syringe system tabard Tabernacle tabi, tabis tab_key, tab table table tablefork table_knife table_lamp table_saw tablespoon tablet-armed_chair table-tennis_table, ping-pong_table, pingpong_table table-tennis_racquet, table-tennis_bat, pingpong_paddle tabletop tableware tabor, tabour taboret, tabouret tachistoscope, t-scope tachograph tachometer, tach tachymeter, tacheometer tack tack_hammer taffeta taffrail tailgate, tailboard taillight, tail_lamp, rear_light, rear_lamp tailor-made tailor's_chalk tailpipe tail_rotor, anti-torque_rotor tailstock take-up talaria talcum, talcum_powder tam, tam-o'-shanter, tammy tambour tambour, embroidery_frame, embroidery_hoop tambourine tammy tamp, tamper, tamping_bar Tampax tampion, tompion tampon tandoor tangram tank, storage_tank tank, army_tank, armored_combat_vehicle, armoured_combat_vehicle tankard tank_car, tank tank_destroyer tank_engine, tank_locomotive tanker_plane tank_shell tank_top tannoy tap, spigot tapa, tappa tape, tape_recording, taping tape, tapeline, tape_measure tape_deck tape_drive, tape_transport, transport tape_player tape_recorder, tape_machine taper_file tapestry, tapis tappet tap_wrench tare target, butt target_acquisition_system tarmacadam, tarmac, macadam tarpaulin, tarp tartan, plaid tasset, tasse tattoo tavern, tap_house tawse taximeter T-bar_lift, T-bar, Alpine_lift tea_bag tea_ball tea_cart, teacart, tea_trolley, tea_wagon tea_chest teaching_aid teacup tea_gown teakettle tea_maker teapot teashop, teahouse, tearoom, tea_parlor, tea_parlour teaspoon tea-strainer tea_table tea_tray tea_urn tee, golf_tee tee_hinge, T_hinge telecom_hotel, telco_building telecommunication_system, telecom_system, telecommunication_equipment, telecom_equipment telegraph, telegraphy telegraph_key telemeter telephone, phone, telephone_set telephone_bell telephone_booth, phone_booth, call_box, telephone_box, telephone_kiosk telephone_cord, phone_cord telephone_jack, phone_jack telephone_line, phone_line, telephone_circuit, subscriber_line, line telephone_plug, phone_plug telephone_pole, telegraph_pole, telegraph_post telephone_receiver, receiver telephone_system, phone_system telephone_wire, telephone_line, telegraph_wire, telegraph_line telephoto_lens, zoom_lens Teleprompter telescope, scope telescopic_sight, telescope_sight telethermometer teletypewriter, teleprinter, teletype_machine, telex, telex_machine television, television_system television_antenna, tv-antenna television_camera, tv_camera, camera television_equipment, video_equipment television_monitor, tv_monitor television_receiver, television, television_set, tv, tv_set, idiot_box, boob_tube, telly, goggle_box television_room, tv_room television_transmitter telpher, telfer telpherage, telferage tempera, poster_paint, poster_color, poster_colour temple temple temporary_hookup, patch tender, supply_ship tender, ship's_boat, pinnace, cutter tender tenement, tenement_house tennis_ball tennis_camp tennis_racket, tennis_racquet tenon tenor_drum, tom-tom tenoroon tenpenny_nail tenpin tensimeter tensiometer tensiometer tensiometer tent, collapsible_shelter tenter tenterhook tent-fly, rainfly, fly_sheet, fly, tent_flap tent_peg tepee, tipi, teepee terminal, pole terminal terraced_house terra_cotta terrarium terra_sigillata, Samian_ware terry, terry_cloth, terrycloth Tesla_coil tessera test_equipment test_rocket, research_rocket, test_instrument_vehicle test_room, testing_room testudo tetraskelion, tetraskele tetrode textile_machine textile_mill thatch, thatched_roof theater, theatre, house theater_curtain, theatre_curtain theater_light theodolite, transit theremin thermal_printer thermal_reactor thermocouple, thermocouple_junction thermoelectric_thermometer, thermel, electric_thermometer thermograph, thermometrograph thermograph thermohydrometer, thermogravimeter thermojunction thermometer thermonuclear_reactor, fusion_reactor thermopile thermos, thermos_bottle, thermos_flask thermostat, thermoregulator thigh_pad thill thimble thinning_shears third_base, third third_gear, third third_rail thong thong three-centered_arch, basket-handle_arch three-decker three-dimensional_radar, 3d_radar three-piece_suit three-quarter_binding three-way_switch, three-point_switch thresher, thrasher, threshing_machine threshing_floor thriftshop, second-hand_store throat_protector throne thrust_bearing thruster thumb thumbhole thumbscrew thumbstall thumbtack, drawing_pin, pushpin thunderer thwart, cross_thwart tiara ticking tickler_coil tie, tie_beam tie, railroad_tie, crosstie, sleeper tie_rack tie_rod tights, leotards tile tile_cutter tile_roof tiller tilter tilt-top_table, tip-top_table, tip_table timber timber timber_hitch timbrel time_bomb, infernal_machine time_capsule time_clock time-delay_measuring_instrument, time-delay_measuring_system time-fuse timepiece, timekeeper, horologe timer timer time-switch tin tinderbox tine tinfoil, tin_foil tippet tire_chain, snow_chain tire_iron, tire_tool titfer tithe_barn titrator toaster toaster_oven toasting_fork toastrack tobacco_pouch tobacco_shop, tobacconist_shop, tobacconist toboggan toby, toby_jug, toby_fillpot_jug tocsin, warning_bell toe toecap toehold toga toga_virilis toggle toggle_bolt toggle_joint toggle_switch, toggle, on-off_switch, on/off_switch togs, threads, duds toilet, lavatory, lav, can, john, privy, bathroom toilet_bag, sponge_bag toilet_bowl toilet_kit, travel_kit toilet_powder, bath_powder, dusting_powder toiletry, toilet_articles toilet_seat toilet_water, eau_de_toilette tokamak token tollbooth, tolbooth, tollhouse toll_bridge tollgate, tollbar toll_line tomahawk, hatchet Tommy_gun, Thompson_submachine_gun tomograph tone_arm, pickup, pickup_arm toner tongs, pair_of_tongs tongue tongue_and_groove_joint tongue_depressor tonometer tool tool_bag toolbox, tool_chest, tool_cabinet, tool_case toolshed, toolhouse tooth tooth toothbrush toothpick top top, cover topgallant, topgallant_mast topgallant, topgallant_sail topiary topknot topmast topper topsail toque torch torpedo torpedo torpedo torpedo_boat torpedo-boat_destroyer torpedo_tube torque_converter torque_wrench torture_chamber totem_pole touch_screen, touchscreen toupee, toupe touring_car, phaeton, tourer tourist_class, third_class towel toweling, towelling towel_rack, towel_horse towel_rail, towel_bar tower town_hall towpath, towing_path tow_truck, tow_car, wrecker toy toy_box, toy_chest toyshop trace_detector track, rail, rails, runway track trackball tracked_vehicle tract_house tract_housing traction_engine tractor tractor trail_bike, dirt_bike, scrambler trailer, house_trailer trailer trailer_camp, trailer_park trailer_truck, tractor_trailer, trucking_rig, rig, articulated_lorry, semi trailing_edge train, railroad_train tramline, tramway, streetcar_track trammel trampoline tramp_steamer, tramp tramway, tram, aerial_tramway, cable_tramway, ropeway transdermal_patch, skin_patch transept transformer transistor, junction_transistor, electronic_transistor transit_instrument transmission, transmission_system transmission_shaft transmitter, sender transom, traverse transom, transom_window, fanlight transponder transporter transporter, car_transporter transport_ship trap trap_door trapeze trave, traverse, crossbeam, crosspiece travel_iron trawl, dragnet, trawl_net trawl, trawl_line, spiller, setline, trotline trawler, dragger tray tray_cloth tread tread treadmill, treadwheel, tread-wheel treadmill treasure_chest treasure_ship treenail, trenail, trunnel trefoil_arch trellis, treillage trench trench_coat trench_knife trepan trepan, trephine trestle trestle trestle_bridge trestle_table trestlework trews trial_balloon triangle triangle triclinium triclinium tricorn, tricorne tricot tricycle, trike, velocipede trident trigger trimaran trimmer trimmer_arch triode tripod triptych trip_wire trireme triskelion, triskele triumphal_arch trivet trivet troika troll trolleybus, trolley_coach, trackless_trolley trombone troop_carrier, troop_transport troopship trophy_case trough trouser trouser_cuff trouser_press, pants_presser trouser, pant trousseau trowel truck, motortruck trumpet_arch truncheon, nightstick, baton, billy, billystick, billy_club trundle_bed, trundle, truckle_bed, truckle trunk trunk_hose trunk_lid trunk_line truss truss_bridge try_square T-square tub, vat tube, vacuum_tube, thermionic_vacuum_tube, thermionic_tube, electron_tube, thermionic_valve tuck_box tucker tucker-bag tuck_shop Tudor_arch, four-centered_arch tudung tugboat, tug, towboat, tower tulle tumble-dryer, tumble_drier tumbler tumbrel, tumbril tun tunic tuning_fork tupik, tupek, sealskin_tent turban turbine turbogenerator tureen Turkish_bath Turkish_towel, terry_towel Turk's_head turnbuckle turner, food_turner turnery turnpike turnspit turnstile turntable turntable, lazy_Susan turret turret_clock turtleneck, turtle, polo-neck tweed tweeter twenty-two, .22 twenty-two_pistol twenty-two_rifle twill twill, twill_weave twin_bed twinjet twist_bit, twist_drill two-by-four two-man_tent two-piece, two-piece_suit, lounge_suit typesetting_machine typewriter typewriter_carriage typewriter_keyboard tyrolean, tirolean uke, ukulele ulster ultracentrifuge ultramicroscope, dark-field_microscope Ultrasuede ultraviolet_lamp, ultraviolet_source umbrella umbrella_tent undercarriage undercoat, underseal undergarment, unmentionable underpants underwear, underclothes, underclothing undies uneven_parallel_bars, uneven_bars unicycle, monocycle uniform universal_joint, universal university upholstery upholstery_material upholstery_needle uplift upper_berth, upper upright, upright_piano upset, swage upstairs urceole urn urn used-car, secondhand_car utensil Uzi vacation_home vacuum, vacuum_cleaner vacuum_chamber vacuum_flask, vacuum_bottle vacuum_gauge, vacuum_gage Valenciennes, Valenciennes_lace valise valve valve valve-in-head_engine vambrace, lower_cannon van van, caravan vane vaporizer, vaporiser variable-pitch_propeller variometer varnish vase vault vault, bank_vault vaulting_horse, long_horse, buck vehicle Velcro velocipede velour, velours velvet velveteen vending_machine veneer, veneering Venetian_blind Venn_diagram, Venn's_diagram ventilation, ventilation_system, ventilating_system ventilation_shaft ventilator veranda, verandah, gallery verdigris vernier_caliper, vernier_micrometer vernier_scale, vernier vertical_file vertical_stabilizer, vertical_stabiliser, vertical_fin, tail_fin, tailfin vertical_tail Very_pistol, Verey_pistol vessel, watercraft vessel vest, waistcoat vestiture vestment vest_pocket vestry, sacristy viaduct vibraphone, vibraharp, vibes vibrator vibrator Victrola vicuna videocassette videocassette_recorder, VCR videodisk, videodisc, DVD video_recording, video videotape videotape vigil_light, vigil_candle villa villa villa viol viola viola_da_braccio viola_da_gamba, gamba, bass_viol viola_d'amore violin, fiddle virginal, pair_of_virginals viscometer, viscosimeter viscose_rayon, viscose vise, bench_vise visor, vizor visual_display_unit, VDU vivarium Viyella voile volleyball volleyball_net voltage_regulator voltaic_cell, galvanic_cell, primary_cell voltaic_pile, pile, galvanic_pile voltmeter vomitory von_Neumann_machine voting_booth voting_machine voussoir vox_angelica, voix_celeste vox_humana waders wading_pool waffle_iron wagon, waggon wagon, coaster_wagon wagon_tire wagon_wheel wain wainscot, wainscoting, wainscotting wainscoting, wainscotting waist_pack, belt_bag walker, baby-walker, go-cart walker, Zimmer, Zimmer_frame walker walkie-talkie, walky-talky walk-in walking_shoe walking_stick Walkman walk-up_apartment, walk-up wall wall wall_clock wallet, billfold, notecase, pocketbook wall_tent wall_unit wand Wankel_engine, Wankel_rotary_engine, epitrochoidal_engine ward, hospital_ward wardrobe, closet, press wardroom warehouse, storage_warehouse warming_pan war_paint warplane, military_plane war_room warship, war_vessel, combat_ship wash wash-and-wear washbasin, handbasin, washbowl, lavabo, wash-hand_basin washboard, splashboard washboard washer, automatic_washer, washing_machine washer washhouse washroom washstand, wash-hand_stand washtub wastepaper_basket, waste-paper_basket, wastebasket, waste_basket, circular_file watch, ticker watch_cap watch_case watch_glass watchtower water-base_paint water_bed water_bottle water_butt water_cart water_chute water_closet, closet, W.C., loo watercolor, water-color, watercolour, water-colour water-cooled_reactor water_cooler water_faucet, water_tap, tap, hydrant water_filter water_gauge, water_gage, water_glass water_glass water_hazard water_heater, hot-water_heater, hot-water_tank watering_can, watering_pot watering_cart water_jacket water_jug water_jump water_level water_meter water_mill waterproof waterproofing water_pump water_scooter, sea_scooter, scooter water_ski waterspout water_tower water_wagon, water_waggon waterwheel, water_wheel waterwheel, water_wheel water_wings waterworks wattmeter waxwork, wax_figure ways, shipway, slipway weapon, arm, weapon_system weaponry, arms, implements_of_war, weapons_system, munition weapons_carrier weathercock weatherglass weather_satellite, meteorological_satellite weather_ship weathervane, weather_vane, vane, wind_vane web, entanglement web webbing webcam wedge wedge wedgie Wedgwood weeder, weed-whacker weeds, widow's_weeds weekender weighbridge weight, free_weight, exercising_weight weir weir welcome_wagon weld welder's_mask weldment well wellhead welt Weston_cell, cadmium_cell wet_bar wet-bulb_thermometer wet_cell wet_fly wet_suit whaleboat whaler, whaling_ship whaling_gun wheel wheel wheel_and_axle wheelchair wheeled_vehicle wheelwork wherry wherry, Norfolk_wherry whetstone whiffletree, whippletree, swingletree whip whipcord whipping_post whipstitch, whipping, whipstitching whirler whisk, whisk_broom whisk whiskey_bottle whiskey_jug whispering_gallery, whispering_dome whistle whistle white white_goods whitewash whorehouse, brothel, bordello, bagnio, house_of_prostitution, house_of_ill_repute, bawdyhouse, cathouse, sporting_house wick, taper wicker, wickerwork, caning wicker_basket wicket, hoop wicket wickiup, wikiup wide-angle_lens, fisheye_lens widebody_aircraft, wide-body_aircraft, wide-body, twin-aisle_airplane wide_wale widow's_walk Wiffle, Wiffle_Ball wig wigwam Wilton, Wilton_carpet wimple wincey winceyette winch, windlass Winchester windbreak, shelterbelt winder, key wind_instrument, wind windjammer windmill, aerogenerator, wind_generator windmill window window window_blind window_box window_envelope window_frame window_screen window_seat window_shade windowsill windshield, windscreen windshield_wiper, windscreen_wiper, wiper, wiper_blade Windsor_chair Windsor_knot Windsor_tie wind_tee wind_tunnel wind_turbine wine_bar wine_bottle wine_bucket, wine_cooler wine_cask, wine_barrel wineglass winepress winery, wine_maker wineskin wing wing_chair wing_nut, wing-nut, wing_screw, butterfly_nut, thumbnut wing_tip wing_tip winker, blinker, blinder wiper, wiper_arm, contact_arm wiper_motor wire wire, conducting_wire wire_cloth wire_cutter wire_gauge, wire_gage wireless_local_area_network, WLAN, wireless_fidelity, WiFi wire_matrix_printer, wire_printer, stylus_printer wire_recorder wire_stripper wirework, grillwork wiring wishing_cap witness_box, witness_stand wok woman's_clothing wood woodcarving wood_chisel woodenware wooden_spoon woodscrew woodshed wood_vise, woodworking_vise, shoulder_vise woodwind, woodwind_instrument, wood woof, weft, filling, pick woofer wool, woolen, woollen workbasket, workbox, workbag workbench, work_bench, bench work-clothing, work-clothes workhouse workhouse workpiece workroom works, workings work-shirt workstation worktable, work_table workwear World_Wide_Web, WWW, web worm_fence, snake_fence, snake-rail_fence, Virginia_fence worm_gear worm_wheel worsted worsted, worsted_yarn wrap, wrapper wraparound wrapping, wrap, wrapper wreck wrench, spanner wrestling_mat wringer wrist_pad wrist_pin, gudgeon_pin wristwatch, wrist_watch writing_arm writing_desk writing_desk writing_implement xerographic_printer Xerox, xerographic_copier, Xerox_machine X-ray_film X-ray_machine X-ray_tube yacht, racing_yacht yacht_chair yagi, Yagi_aerial yard yard yardarm yard_marker yardstick, yard_measure yarmulke, yarmulka, yarmelke yashmak, yashmac yataghan yawl, dandy yawl yoke yoke yoke, coupling yurt Zamboni zero ziggurat, zikkurat, zikurat zill zip_gun zither, cither, zithern zoot_suit shading grain wood_grain, woodgrain, woodiness graining, woodgraining marbleization, marbleisation, marbleizing, marbleising light, lightness aura, aureole, halo, nimbus, glory, gloriole sunniness glint opalescence, iridescence polish, gloss, glossiness, burnish primary_color_for_pigments, primary_colour_for_pigments primary_color_for_light, primary_colour_for_light colorlessness, colourlessness, achromatism, achromaticity mottle achromia shade, tint, tincture, tone chromatic_color, chromatic_colour, spectral_color, spectral_colour black, blackness, inkiness coal_black, ebony, jet_black, pitch_black, sable, soot_black alabaster bone, ivory, pearl, off-white gray, grayness, grey, greyness ash_grey, ash_gray, silver, silver_grey, silver_gray charcoal, charcoal_grey, charcoal_gray, oxford_grey, oxford_gray sanguine Turkey_red, alizarine_red crimson, ruby, deep_red dark_red claret fuschia maroon orange, orangeness reddish_orange yellow, yellowness gamboge, lemon, lemon_yellow, maize pale_yellow, straw, wheat green, greenness, viridity greenishness sea_green sage_green bottle_green emerald olive_green, olive-green jade_green, jade blue, blueness azure, cerulean, sapphire, lazuline, sky-blue steel_blue greenish_blue, aqua, aquamarine, turquoise, cobalt_blue, peacock_blue purplish_blue, royal_blue purple, purpleness Tyrian_purple indigo lavender reddish_purple, royal_purple pink carnation rose, rosiness chestnut chocolate, coffee, deep_brown, umber, burnt_umber light_brown tan, topaz beige, ecru reddish_brown, sepia, burnt_sienna, Venetian_red, mahogany brick_red copper, copper_color Indian_red puce olive ultramarine complementary_color, complementary pigmentation complexion, skin_color, skin_colour ruddiness, rosiness nonsolid_color, nonsolid_colour, dithered_color, dithered_colour aposematic_coloration, warning_coloration cryptic_coloration ring center_of_curvature, centre_of_curvature cadaver, corpse, stiff, clay, remains mandibular_notch rib skin, tegument, cutis skin_graft epidermal_cell melanocyte prickle_cell columnar_cell, columnar_epithelial_cell spongioblast squamous_cell amyloid_plaque, amyloid_protein_plaque dental_plaque, bacterial_plaque macule, macula freckle, lentigo bouffant sausage_curl forelock spit_curl, kiss_curl pigtail pageboy pompadour thatch soup-strainer, toothbrush mustachio, moustachio, handle-bars walrus_mustache, walrus_moustache stubble vandyke_beard, vandyke soul_patch, Attilio esophageal_smear paraduodenal_smear, duodenal_smear specimen punctum glenoid_fossa, glenoid_cavity diastema marrow, bone_marrow mouth, oral_cavity, oral_fissure, rima_oris canthus milk mother's_milk colostrum, foremilk vein, vena, venous_blood_vessel ganglion_cell, gangliocyte X_chromosome embryonic_cell, formative_cell myeloblast sideroblast osteocyte megalocyte, macrocyte leukocyte, leucocyte, white_blood_cell, white_cell, white_blood_corpuscle, white_corpuscle, WBC histiocyte fixed_phagocyte lymphocyte, lymph_cell monoblast neutrophil, neutrophile microphage sickle_cell siderocyte spherocyte ootid oocyte spermatid Leydig_cell, Leydig's_cell striated_muscle_cell, striated_muscle_fiber smooth_muscle_cell Ranvier's_nodes, nodes_of_Ranvier neuroglia, glia astrocyte protoplasmic_astrocyte oligodendrocyte proprioceptor dendrite sensory_fiber, afferent_fiber subarachnoid_space cerebral_cortex, cerebral_mantle, pallium, cortex renal_cortex prepuce, foreskin head, caput scalp frontal_eminence suture, sutura, fibrous_joint foramen_magnum esophagogastric_junction, oesophagogastric_junction heel cuticle hangnail, agnail exoskeleton abdominal_wall lemon coordinate_axis landscape medium vehicle paper channel, transmission_channel film, cinema, celluloid silver_screen free_press press, public_press print_media storage_medium, data-storage_medium magnetic_storage_medium, magnetic_medium, magnetic_storage journalism, news_media Fleet_Street photojournalism news_photography rotogravure newspaper, paper daily gazette school_newspaper, school_paper tabloid, rag, sheet yellow_journalism, tabloid, tab telecommunication, telecom telephone, telephony voice_mail, voicemail call, phone_call, telephone_call call-back collect_call call_forwarding call-in call_waiting crank_call local_call long_distance, long-distance_call, trunk_call toll_call wake-up_call three-way_calling telegraphy cable, cablegram, overseas_telegram wireless radiotelegraph, radiotelegraphy, wireless_telegraphy radiotelephone, radiotelephony, wireless_telephone broadcasting Rediffusion multiplex radio, radiocommunication, wireless television, telecasting, TV, video cable_television, cable high-definition_television, HDTV reception signal_detection, detection Hakham web_site, website, internet_site, site chat_room, chatroom portal_site, portal jotter breviary wordbook desk_dictionary, collegiate_dictionary reckoner, ready_reckoner document, written_document, papers album, record_album concept_album rock_opera tribute_album, benefit_album magazine, mag colour_supplement comic_book news_magazine pulp, pulp_magazine slick, slick_magazine, glossy trade_magazine movie, film, picture, moving_picture, moving-picture_show, motion_picture, motion-picture_show, picture_show, pic, flick outtake shoot-'em-up spaghetti_Western encyclical, encyclical_letter crossword_puzzle, crossword sign street_sign traffic_light, traffic_signal, stoplight swastika, Hakenkreuz concert artwork, art, graphics, nontextual_matter lobe book_jacket, dust_cover, dust_jacket, dust_wrapper cairn three-day_event comfort_food comestible, edible, eatable, pabulum, victual, victuals tuck course dainty, delicacy, goody, kickshaw, treat dish fast_food finger_food ingesta kosher fare diet diet dietary balanced_diet bland_diet, ulcer_diet clear_liquid_diet diabetic_diet dietary_supplement carbohydrate_loading, carbo_loading fad_diet gluten-free_diet high-protein_diet high-vitamin_diet, vitamin-deficiency_diet light_diet liquid_diet low-calorie_diet low-fat_diet low-sodium_diet, low-salt_diet, salt-free_diet macrobiotic_diet reducing_diet, obesity_diet soft_diet, pap, spoon_food vegetarianism menu chow, chuck, eats, grub board, table mess ration field_ration K_ration C-ration foodstuff, food_product starches breadstuff coloring, colouring, food_coloring, food_colouring, food_color, food_colour concentrate tomato_concentrate meal kibble cornmeal, Indian_meal farina matzo_meal, matzoh_meal, matzah_meal oatmeal, rolled_oats pea_flour roughage, fiber bran flour plain_flour wheat_flour whole_wheat_flour, graham_flour, graham, whole_meal_flour soybean_meal, soybean_flour, soy_flour semolina corn_gluten_feed nutriment, nourishment, nutrition, sustenance, aliment, alimentation, victuals commissariat, provisions, provender, viands, victuals larder frozen_food, frozen_foods canned_food, canned_foods, canned_goods, tinned_goods canned_meat, tinned_meat Spam dehydrated_food, dehydrated_foods square_meal meal, repast potluck refection refreshment breakfast continental_breakfast, petit_dejeuner brunch lunch, luncheon, tiffin, dejeuner business_lunch high_tea tea, afternoon_tea, teatime dinner supper buffet picnic cookout barbecue, barbeque clambake fish_fry bite, collation, snack nosh nosh-up ploughman's_lunch coffee_break, tea_break banquet, feast, spread entree, main_course piece_de_resistance plate adobo side_dish, side_order, entremets special casserole chicken_casserole chicken_cacciatore, chicken_cacciatora, hunter's_chicken antipasto appetizer, appetiser, starter canape cocktail fruit_cocktail crab_cocktail shrimp_cocktail hors_d'oeuvre relish dip bean_dip cheese_dip clam_dip guacamole soup soup_du_jour alphabet_soup consomme madrilene bisque borsch, borsh, borscht, borsht, borshch, bortsch broth barley_water bouillon beef_broth, beef_stock chicken_broth, chicken_stock broth, stock stock_cube chicken_soup cock-a-leekie, cocky-leeky gazpacho gumbo julienne marmite mock_turtle_soup mulligatawny oxtail_soup pea_soup pepper_pot, Philadelphia_pepper_pot petite_marmite, minestrone, vegetable_soup potage, pottage pottage turtle_soup, green_turtle_soup eggdrop_soup chowder corn_chowder clam_chowder Manhattan_clam_chowder New_England_clam_chowder fish_chowder won_ton, wonton, wonton_soup split-pea_soup green_pea_soup, potage_St._Germain lentil_soup Scotch_broth vichyssoise stew bigos Brunswick_stew burgoo burgoo olla_podrida, Spanish_burgoo mulligan_stew, mulligan, Irish_burgoo purloo, chicken_purloo, poilu goulash, Hungarian_goulash, gulyas hotchpotch hot_pot, hotpot beef_goulash pork-and-veal_goulash porkholt Irish_stew oyster_stew lobster_stew lobscouse, lobscuse, scouse fish_stew bouillabaisse matelote paella fricassee chicken_stew turkey_stew beef_stew ragout ratatouille salmi pot-au-feu slumgullion smorgasbord viand ready-mix brownie_mix cake_mix lemonade_mix self-rising_flour, self-raising_flour choice_morsel, tidbit, titbit savory, savoury calf's-foot_jelly caramel, caramelized_sugar lump_sugar cane_sugar castor_sugar, caster_sugar powdered_sugar granulated_sugar icing_sugar corn_sugar brown_sugar demerara, demerara_sugar sweet, confection confectionery confiture sweetmeat candy, confect candy_bar carob_bar hardbake hard_candy barley-sugar, barley_candy brandyball jawbreaker lemon_drop sourball patty peppermint_patty bonbon brittle, toffee, toffy peanut_brittle chewing_gum, gum gum_ball bubble_gum butterscotch candied_fruit, succade, crystallized_fruit candied_apple, candy_apple, taffy_apple, caramel_apple, toffee_apple crystallized_ginger grapefruit_peel lemon_peel orange_peel candied_citrus_peel candy_cane candy_corn caramel center, centre comfit cotton_candy, spun_sugar, candyfloss dragee dragee fondant fudge chocolate_fudge divinity, divinity_fudge penuche, penoche, panoche, panocha gumdrop jujube honey_crisp mint, mint_candy horehound peppermint, peppermint_candy jelly_bean, jelly_egg kiss, candy_kiss molasses_kiss meringue_kiss chocolate_kiss licorice, liquorice Life_Saver lollipop, sucker, all-day_sucker lozenge cachou cough_drop, troche, pastille, pastil marshmallow marzipan, marchpane nougat nougat_bar nut_bar peanut_bar popcorn_ball praline rock_candy rock_candy, rock sugar_candy sugarplum taffy molasses_taffy truffle, chocolate_truffle Turkish_Delight dessert, sweet, afters ambrosia, nectar ambrosia baked_Alaska blancmange charlotte compote, fruit_compote dumpling flan frozen_dessert junket mousse mousse pavlova peach_melba whip prune_whip pudding pudding, pud syllabub, sillabub tiramisu trifle tipsy_cake jello, Jell-O apple_dumpling ice, frappe water_ice, sorbet ice_cream, icecream ice-cream_cone chocolate_ice_cream Neapolitan_ice_cream peach_ice_cream sherbert, sherbet strawberry_ice_cream tutti-frutti vanilla_ice_cream ice_lolly, lolly, lollipop, popsicle ice_milk frozen_yogurt snowball snowball parfait ice-cream_sundae, sundae split banana_split frozen_pudding frozen_custard, soft_ice_cream pudding flummery fish_mousse chicken_mousse chocolate_mousse plum_pudding, Christmas_pudding carrot_pudding corn_pudding steamed_pudding duff, plum_duff vanilla_pudding chocolate_pudding brown_Betty Nesselrode, Nesselrode_pudding pease_pudding custard creme_caramel creme_anglais creme_brulee fruit_custard tapioca tapioca_pudding roly-poly, roly-poly_pudding suet_pudding Bavarian_cream maraschino, maraschino_cherry nonpareil zabaglione, sabayon garnish pastry, pastry_dough turnover apple_turnover knish pirogi, piroshki, pirozhki samosa timbale puff_paste, pate_feuillete phyllo puff_batter, pouf_paste, pate_a_choux ice-cream_cake, icebox_cake doughnut, donut, sinker fish_cake, fish_ball fish_stick, fish_finger conserve, preserve, conserves, preserves apple_butter chowchow jam lemon_curd, lemon_cheese strawberry_jam, strawberry_preserves jelly apple_jelly crabapple_jelly grape_jelly marmalade orange_marmalade gelatin, jelly gelatin_dessert buffalo_wing barbecued_wing mess mince puree barbecue, barbeque biryani, biriani escalope_de_veau_Orloff saute patty, cake veal_parmesan, veal_parmigiana veal_cordon_bleu margarine, margarin, oleo, oleomargarine, marge mincemeat stuffing, dressing turkey_stuffing oyster_stuffing, oyster_dressing forcemeat, farce bread, breadstuff, staff_of_life anadama_bread bap barmbrack breadstick, bread-stick grissino brown_bread, Boston_brown_bread bun, roll tea_bread caraway_seed_bread challah, hallah cinnamon_bread cracked-wheat_bread cracker crouton dark_bread, whole_wheat_bread, whole_meal_bread, brown_bread English_muffin flatbread garlic_bread gluten_bread graham_bread Host flatbrod bannock chapatti, chapati pita, pocket_bread loaf_of_bread, loaf French_loaf matzo, matzoh, matzah, unleavened_bread nan, naan onion_bread raisin_bread quick_bread banana_bread date_bread date-nut_bread nut_bread oatcake Irish_soda_bread skillet_bread, fry_bread rye_bread black_bread, pumpernickel Jewish_rye_bread, Jewish_rye limpa Swedish_rye_bread, Swedish_rye salt-rising_bread simnel sour_bread, sourdough_bread toast wafer white_bread, light_bread baguet, baguette French_bread Italian_bread cornbread corn_cake skillet_corn_bread ashcake, ash_cake, corn_tash hoecake cornpone, pone corn_dab, corn_dodger, dodger hush_puppy, hushpuppy johnnycake, johnny_cake, journey_cake Shawnee_cake spoon_bread, batter_bread cinnamon_toast orange_toast Melba_toast zwieback, rusk, Brussels_biscuit, twice-baked_bread frankfurter_bun, hotdog_bun hamburger_bun, hamburger_roll muffin, gem bran_muffin corn_muffin Yorkshire_pudding popover scone drop_scone, griddlecake, Scotch_pancake cross_bun, hot_cross_bun brioche crescent_roll, croissant hard_roll, Vienna_roll soft_roll kaiser_roll Parker_House_roll clover-leaf_roll onion_roll bialy, bialystoker sweet_roll, coffee_roll bear_claw, bear_paw cinnamon_roll, cinnamon_bun, cinnamon_snail honey_bun, sticky_bun, caramel_bun, schnecken pinwheel_roll danish, danish_pastry bagel, beigel onion_bagel biscuit rolled_biscuit baking-powder_biscuit buttermilk_biscuit, soda_biscuit shortcake hardtack, pilot_biscuit, pilot_bread, sea_biscuit, ship_biscuit saltine soda_cracker oyster_cracker water_biscuit graham_cracker pretzel soft_pretzel sandwich sandwich_plate butty ham_sandwich chicken_sandwich club_sandwich, three-decker, triple-decker open-face_sandwich, open_sandwich hamburger, beefburger, burger cheeseburger tunaburger hotdog, hot_dog, red_hot Sloppy_Joe bomber, grinder, hero, hero_sandwich, hoagie, hoagy, Cuban_sandwich, Italian_sandwich, poor_boy, sub, submarine, submarine_sandwich, torpedo, wedge, zep gyro bacon-lettuce-tomato_sandwich, BLT Reuben western, western_sandwich wrap spaghetti hasty_pudding gruel congee, jook skilly edible_fruit vegetable, veggie, veg julienne, julienne_vegetable raw_vegetable, rabbit_food crudites celery_stick legume pulse potherb greens, green, leafy_vegetable chop-suey_greens bean_curd, tofu solanaceous_vegetable root_vegetable potato, white_potato, Irish_potato, murphy, spud, tater baked_potato french_fries, french-fried_potatoes, fries, chips home_fries, home-fried_potatoes jacket_potato mashed_potato potato_skin, potato_peel, potato_peelings Uruguay_potato yam sweet_potato yam snack_food chip, crisp, potato_chip, Saratoga_chip corn_chip tortilla_chip nacho eggplant, aubergine, mad_apple pieplant, rhubarb cruciferous_vegetable mustard, mustard_greens, leaf_mustard, Indian_mustard cabbage, chou kale, kail, cole collards, collard_greens Chinese_cabbage, celery_cabbage, Chinese_celery bok_choy, bok_choi head_cabbage red_cabbage savoy_cabbage, savoy broccoli cauliflower brussels_sprouts broccoli_rabe, broccoli_raab squash summer_squash yellow_squash crookneck, crookneck_squash, summer_crookneck zucchini, courgette marrow, vegetable_marrow cocozelle pattypan_squash spaghetti_squash winter_squash acorn_squash butternut_squash hubbard_squash turban_squash buttercup_squash cushaw winter_crookneck_squash cucumber, cuke gherkin artichoke, globe_artichoke artichoke_heart Jerusalem_artichoke, sunchoke asparagus bamboo_shoot sprout bean_sprout alfalfa_sprout beet, beetroot beet_green sugar_beet mangel-wurzel chard, Swiss_chard, spinach_beet, leaf_beet pepper sweet_pepper bell_pepper green_pepper globe_pepper pimento, pimiento hot_pepper chili, chili_pepper, chilli, chilly, chile jalapeno, jalapeno_pepper chipotle cayenne, cayenne_pepper tabasco, red_pepper onion Bermuda_onion green_onion, spring_onion, scallion Vidalia_onion Spanish_onion purple_onion, red_onion leek shallot salad_green, salad_greens lettuce butterhead_lettuce buttercrunch Bibb_lettuce Boston_lettuce crisphead_lettuce, iceberg_lettuce, iceberg cos, cos_lettuce, romaine, romaine_lettuce leaf_lettuce, loose-leaf_lettuce celtuce bean, edible_bean goa_bean lentil pea green_pea, garden_pea marrowfat_pea snow_pea, sugar_pea sugar_snap_pea split-pea chickpea, garbanzo cajan_pea, pigeon_pea, dahl field_pea mushy_peas black-eyed_pea, cowpea common_bean kidney_bean navy_bean, pea_bean, white_bean pinto_bean frijole black_bean, turtle_bean fresh_bean flageolet, haricot green_bean snap_bean, snap string_bean Kentucky_wonder, Kentucky_wonder_bean scarlet_runner, scarlet_runner_bean, runner_bean, English_runner_bean haricot_vert, haricots_verts, French_bean wax_bean, yellow_bean shell_bean lima_bean Fordhooks sieva_bean, butter_bean, butterbean, civet_bean fava_bean, broad_bean soy, soybean, soya, soya_bean green_soybean field_soybean cardoon carrot carrot_stick celery pascal_celery, Paschal_celery celeriac, celery_root chicory, curly_endive radicchio coffee_substitute chicory, chicory_root Postum chicory_escarole, endive, escarole Belgian_endive, French_endive, witloof corn, edible_corn sweet_corn, green_corn hominy lye_hominy pearl_hominy popcorn cress watercress garden_cress winter_cress dandelion_green gumbo, okra kohlrabi, turnip_cabbage lamb's-quarter, pigweed, wild_spinach wild_spinach tomato beefsteak_tomato cherry_tomato plum_tomato tomatillo, husk_tomato, Mexican_husk_tomato mushroom stuffed_mushroom salsify oyster_plant, vegetable_oyster scorzonera, black_salsify parsnip pumpkin radish turnip white_turnip rutabaga, swede, swedish_turnip, yellow_turnip turnip_greens sorrel, common_sorrel French_sorrel spinach taro, taro_root, cocoyam, dasheen, edda truffle, earthnut edible_nut bunya_bunya peanut, earthnut, goober, goober_pea, groundnut, monkey_nut freestone cling, clingstone windfall apple crab_apple, crabapple eating_apple, dessert_apple Baldwin Cortland Cox's_Orange_Pippin Delicious Golden_Delicious, Yellow_Delicious Red_Delicious Empire Grimes'_golden Jonathan McIntosh Macoun Northern_Spy Pearmain Pippin Prima Stayman Winesap Stayman_Winesap cooking_apple Bramley's_Seedling Granny_Smith Lane's_Prince_Albert Newtown_Wonder Rome_Beauty berry bilberry, whortleberry, European_blueberry huckleberry blueberry wintergreen, boxberry, checkerberry, teaberry, spiceberry cranberry lingonberry, mountain_cranberry, cowberry, lowbush_cranberry currant gooseberry black_currant red_currant blackberry boysenberry dewberry loganberry raspberry saskatoon, serviceberry, shadberry, juneberry strawberry sugarberry, hackberry persimmon acerola, barbados_cherry, surinam_cherry, West_Indian_cherry carambola, star_fruit ceriman, monstera carissa_plum, natal_plum citrus, citrus_fruit, citrous_fruit orange temple_orange mandarin, mandarin_orange clementine satsuma tangerine tangelo, ugli, ugli_fruit bitter_orange, Seville_orange, sour_orange sweet_orange Jaffa_orange navel_orange Valencia_orange kumquat lemon lime key_lime grapefruit pomelo, shaddock citrange citron almond Jordan_almond apricot peach nectarine pitahaya plum damson, damson_plum greengage, greengage_plum beach_plum sloe Victoria_plum dried_fruit dried_apricot prune raisin seedless_raisin, sultana seeded_raisin currant fig pineapple, ananas anchovy_pear, river_pear banana passion_fruit granadilla sweet_calabash bell_apple, sweet_cup, water_lemon, yellow_granadilla breadfruit jackfruit, jak, jack cacao_bean, cocoa_bean cocoa canistel, eggfruit melon melon_ball muskmelon, sweet_melon cantaloup, cantaloupe winter_melon honeydew, honeydew_melon Persian_melon net_melon, netted_melon, nutmeg_melon casaba, casaba_melon watermelon cherry sweet_cherry, black_cherry bing_cherry heart_cherry, oxheart, oxheart_cherry blackheart, blackheart_cherry capulin, Mexican_black_cherry sour_cherry amarelle morello cocoa_plum, coco_plum, icaco gherkin grape fox_grape Concord_grape Catawba muscadine, bullace_grape scuppernong slipskin_grape vinifera_grape emperor muscat, muscatel, muscat_grape ribier sultana Tokay flame_tokay Thompson_Seedless custard_apple cherimoya, cherimolla soursop, guanabana sweetsop, annon, sugar_apple ilama pond_apple papaw, pawpaw papaya kai_apple ketembilla, kitembilla, kitambilla ackee, akee durian feijoa, pineapple_guava genip, Spanish_lime genipap, genipap_fruit kiwi, kiwi_fruit, Chinese_gooseberry loquat, Japanese_plum mangosteen mango sapodilla, sapodilla_plum, sapota sapote, mammee, marmalade_plum tamarind, tamarindo avocado, alligator_pear, avocado_pear, aguacate date elderberry guava mombin hog_plum, yellow_mombin hog_plum, wild_plum jaboticaba jujube, Chinese_date, Chinese_jujube litchi, litchi_nut, litchee, lichi, leechee, lichee, lychee longanberry, dragon's_eye mamey, mammee, mammee_apple marang medlar medlar mulberry olive black_olive, ripe_olive green_olive pear bosc anjou bartlett, bartlett_pear seckel, seckel_pear plantain plumcot pomegranate prickly_pear Barbados_gooseberry, blade_apple quandong, quandang, quantong, native_peach quandong_nut quince rambutan, rambotan pulasan, pulassan rose_apple sorb, sorb_apple sour_gourd, monkey_bread edible_seed pumpkin_seed betel_nut, areca_nut beechnut walnut black_walnut English_walnut brazil_nut, brazil butternut souari_nut cashew, cashew_nut chestnut chincapin, chinkapin, chinquapin hazelnut, filbert, cobnut, cob coconut, cocoanut coconut_milk, coconut_water grugru_nut hickory_nut cola_extract macadamia_nut pecan pine_nut, pignolia, pinon_nut pistachio, pistachio_nut sunflower_seed anchovy_paste rollmops feed, provender cattle_cake creep_feed fodder feed_grain eatage, forage, pasture, pasturage, grass silage, ensilage oil_cake oil_meal alfalfa broad_bean, horse_bean hay timothy stover grain, food_grain, cereal grist groats millet barley, barleycorn pearl_barley buckwheat bulgur, bulghur, bulgur_wheat wheat, wheat_berry cracked_wheat stodge wheat_germ oat rice brown_rice white_rice, polished_rice wild_rice, Indian_rice paddy slop, slops, swill, pigswill, pigwash mash chicken_feed, scratch cud, rechewed_food bird_feed, bird_food, birdseed petfood, pet-food, pet_food dog_food cat_food canary_seed salad tossed_salad green_salad Caesar_salad salmagundi salad_nicoise combination_salad chef's_salad potato_salad pasta_salad macaroni_salad fruit_salad Waldorf_salad crab_Louis herring_salad tuna_fish_salad, tuna_salad chicken_salad coleslaw, slaw aspic molded_salad tabbouleh, tabooli ingredient, fixings flavorer, flavourer, flavoring, flavouring, seasoner, seasoning bouillon_cube condiment herb fines_herbes spice spearmint_oil lemon_oil wintergreen_oil, oil_of_wintergreen salt, table_salt, common_salt celery_salt onion_salt seasoned_salt sour_salt five_spice_powder allspice cinnamon stick_cinnamon clove cumin, cumin_seed fennel ginger, gingerroot ginger, powdered_ginger mace nutmeg pepper, peppercorn black_pepper white_pepper sassafras basil, sweet_basil bay_leaf borage hyssop caraway chervil chives comfrey, healing_herb coriander, Chinese_parsley, cilantro coriander, coriander_seed costmary fennel, common_fennel fennel, Florence_fennel, finocchio fennel_seed fenugreek, fenugreek_seed garlic, ail clove, garlic_clove garlic_chive lemon_balm lovage marjoram, oregano mint mustard_seed mustard, table_mustard Chinese_mustard nasturtium parsley salad_burnet rosemary rue sage clary_sage savory, savoury summer_savory, summer_savoury winter_savory, winter_savoury sweet_woodruff, waldmeister sweet_cicely tarragon, estragon thyme turmeric caper catsup, ketchup, cetchup, tomato_ketchup cardamom, cardamon, cardamum cayenne, cayenne_pepper, red_pepper chili_powder chili_sauce chutney, Indian_relish steak_sauce taco_sauce salsa mint_sauce cranberry_sauce curry_powder curry lamb_curry duck_sauce, hoisin_sauce horseradish marinade paprika Spanish_paprika pickle dill_pickle bread_and_butter_pickle pickle_relish piccalilli sweet_pickle applesauce, apple_sauce soy_sauce, soy Tabasco, Tabasco_sauce tomato_paste angelica angelica almond_extract anise, aniseed, anise_seed Chinese_anise, star_anise, star_aniseed juniper_berries saffron sesame_seed, benniseed caraway_seed poppy_seed dill, dill_weed dill_seed celery_seed lemon_extract monosodium_glutamate, MSG vanilla_bean vinegar, acetum cider_vinegar wine_vinegar sauce anchovy_sauce hot_sauce hard_sauce horseradish_sauce, sauce_Albert bolognese_pasta_sauce carbonara tomato_sauce tartare_sauce, tartar_sauce wine_sauce marchand_de_vin, mushroom_wine_sauce bread_sauce plum_sauce peach_sauce apricot_sauce pesto ravigote, ravigotte remoulade_sauce dressing, salad_dressing sauce_Louis bleu_cheese_dressing, blue_cheese_dressing blue_cheese_dressing, Roquefort_dressing French_dressing, vinaigrette, sauce_vinaigrette Lorenzo_dressing anchovy_dressing Italian_dressing half-and-half_dressing mayonnaise, mayo green_mayonnaise, sauce_verte aioli, aioli_sauce, garlic_sauce Russian_dressing, Russian_mayonnaise salad_cream Thousand_Island_dressing barbecue_sauce hollandaise bearnaise Bercy, Bercy_butter bordelaise bourguignon, bourguignon_sauce, Burgundy_sauce brown_sauce, sauce_Espagnole Espagnole, sauce_Espagnole Chinese_brown_sauce, brown_sauce blanc cheese_sauce chocolate_sauce, chocolate_syrup hot-fudge_sauce, fudge_sauce cocktail_sauce, seafood_sauce Colbert, Colbert_butter white_sauce, bechamel_sauce, bechamel cream_sauce Mornay_sauce demiglace, demi-glaze gravy, pan_gravy gravy spaghetti_sauce, pasta_sauce marinara mole hunter's_sauce, sauce_chausseur mushroom_sauce mustard_sauce Nantua, shrimp_sauce Hungarian_sauce, paprika_sauce pepper_sauce, Poivrade roux Smitane Soubise, white_onion_sauce Lyonnaise_sauce, brown_onion_sauce veloute allemande, allemande_sauce caper_sauce poulette curry_sauce Worcester_sauce, Worcestershire, Worcestershire_sauce coconut_milk, coconut_cream egg, eggs egg_white, white, albumen, ovalbumin egg_yolk, yolk boiled_egg, coddled_egg hard-boiled_egg, hard-cooked_egg Easter_egg Easter_egg chocolate_egg candy_egg poached_egg, dropped_egg scrambled_eggs deviled_egg, stuffed_egg shirred_egg, baked_egg, egg_en_cocotte omelet, omelette firm_omelet French_omelet fluffy_omelet western_omelet souffle fried_egg dairy_product milk milk sour_milk soya_milk, soybean_milk, soymilk formula pasteurized_milk cows'_milk yak's_milk goats'_milk acidophilus_milk raw_milk scalded_milk homogenized_milk certified_milk powdered_milk, dry_milk, dried_milk, milk_powder nonfat_dry_milk evaporated_milk condensed_milk skim_milk, skimmed_milk semi-skimmed_milk whole_milk low-fat_milk buttermilk cream clotted_cream, Devonshire_cream double_creme, heavy_whipping_cream half-and-half heavy_cream light_cream, coffee_cream, single_cream sour_cream, soured_cream whipping_cream, light_whipping_cream butter clarified_butter, drawn_butter ghee brown_butter, beurre_noisette Meuniere_butter, lemon_butter yogurt, yoghurt, yoghourt blueberry_yogurt raita whey curd curd clabber cheese paring cream_cheese double_cream mascarpone triple_cream, triple_creme cottage_cheese, pot_cheese, farm_cheese, farmer's_cheese process_cheese, processed_cheese bleu, blue_cheese Stilton Roquefort gorgonzola Danish_blue Bavarian_blue Brie brick_cheese Camembert cheddar, cheddar_cheese, Armerican_cheddar, American_cheese rat_cheese, store_cheese Cheshire_cheese double_Gloucester Edam goat_cheese, chevre Gouda, Gouda_cheese grated_cheese hand_cheese Liederkranz Limburger mozzarella Muenster Parmesan quark_cheese, quark ricotta string_cheese Swiss_cheese Emmenthal, Emmental, Emmenthaler, Emmentaler Gruyere sapsago Velveeta nut_butter peanut_butter marshmallow_fluff onion_butter pimento_butter shrimp_butter lobster_butter yak_butter spread, paste cheese_spread anchovy_butter fishpaste garlic_butter miso wasabi snail_butter hummus, humus, hommos, hoummos, humous pate duck_pate foie_gras, pate_de_foie_gras tapenade tahini sweetening, sweetener aspartame honey saccharin sugar, refined_sugar syrup, sirup sugar_syrup molasses sorghum, sorghum_molasses treacle, golden_syrup grenadine maple_syrup corn_syrup miraculous_food, manna, manna_from_heaven batter dough bread_dough pancake_batter fritter_batter coq_au_vin chicken_provencale chicken_and_rice moo_goo_gai_pan arroz_con_pollo bacon_and_eggs barbecued_spareribs, spareribs beef_Bourguignonne, boeuf_Bourguignonne beef_Wellington, filet_de_boeuf_en_croute bitok boiled_dinner, New_England_boiled_dinner Boston_baked_beans bubble_and_squeak pasta cannelloni carbonnade_flamande, Belgian_beef_stew cheese_souffle chicken_Marengo chicken_cordon_bleu Maryland_chicken chicken_paprika, chicken_paprikash chicken_Tetrazzini Tetrazzini chicken_Kiev chili, chili_con_carne chili_dog chop_suey chow_mein codfish_ball, codfish_cake coquille coquilles_Saint-Jacques croquette cottage_pie rissole dolmas, stuffed_grape_leaves egg_foo_yong, egg_fu_yung egg_roll, spring_roll eggs_Benedict enchilada falafel, felafel fish_and_chips fondue, fondu cheese_fondue chocolate_fondue fondue, fondu beef_fondue, boeuf_fondu_bourguignon French_toast fried_rice, Chinese_fried_rice frittata frog_legs galantine gefilte_fish, fish_ball haggis ham_and_eggs hash corned_beef_hash jambalaya kabob, kebab, shish_kebab kedgeree souvlaki, souvlakia lasagna, lasagne seafood_Newburg lobster_Newburg, lobster_a_la_Newburg shrimp_Newburg Newburg_sauce lobster_thermidor lutefisk, lutfisk macaroni_and_cheese macedoine meatball porcupine_ball, porcupines Swedish_meatball meat_loaf, meatloaf moussaka osso_buco marrow, bone_marrow pheasant_under_glass pigs_in_blankets pilaf, pilaff, pilau, pilaw bulgur_pilaf pizza, pizza_pie sausage_pizza pepperoni_pizza cheese_pizza anchovy_pizza Sicilian_pizza poi pork_and_beans porridge oatmeal, burgoo loblolly potpie rijsttaffel, rijstaffel, rijstafel risotto, Italian_rice roulade fish_loaf salmon_loaf Salisbury_steak sauerbraten sauerkraut scallopine, scallopini veal_scallopini scampi Scotch_egg Scotch_woodcock scrapple spaghetti_and_meatballs Spanish_rice steak_tartare, tartar_steak, cannibal_mound pepper_steak steak_au_poivre, peppered_steak, pepper_steak beef_Stroganoff stuffed_cabbage kishke, stuffed_derma stuffed_peppers stuffed_tomato, hot_stuffed_tomato stuffed_tomato, cold_stuffed_tomato succotash sukiyaki sashimi sushi Swiss_steak tamale tamale_pie tempura teriyaki terrine Welsh_rarebit, Welsh_rabbit, rarebit schnitzel, Wiener_schnitzel taco chicken_taco burrito beef_burrito quesadilla tostada bean_tostada refried_beans, frijoles_refritos beverage, drink, drinkable, potable wish-wash concoction, mixture, intermixture mix, premix filling lekvar potion elixir elixir_of_life philter, philtre, love-potion, love-philter, love-philtre alcohol, alcoholic_drink, alcoholic_beverage, intoxicant, inebriant proof_spirit home_brew, homebrew hooch, hootch kava, kavakava aperitif brew, brewage beer draft_beer, draught_beer suds Munich_beer, Munchener bock, bock_beer lager, lager_beer light_beer Oktoberfest, Octoberfest Pilsner, Pilsener shebeen Weissbier, white_beer, wheat_beer Weizenbock malt wort malt, malt_liquor ale bitter Burton pale_ale porter, porter's_beer stout Guinness kvass mead metheglin hydromel oenomel near_beer ginger_beer sake, saki, rice_beer wine, vino vintage red_wine white_wine blush_wine, pink_wine, rose, rose_wine altar_wine, sacramental_wine sparkling_wine champagne, bubbly cold_duck Burgundy, Burgundy_wine Beaujolais Medoc Canary_wine Chablis, white_Burgundy Montrachet Chardonnay, Pinot_Chardonnay Pinot_noir Pinot_blanc Bordeaux, Bordeaux_wine claret, red_Bordeaux Chianti Cabernet, Cabernet_Sauvignon Merlot Sauvignon_blanc California_wine Cotes_de_Provence dessert_wine Dubonnet jug_wine macon, maconnais Moselle Muscadet plonk retsina Rhine_wine, Rhenish, hock Riesling liebfraumilch Rhone_wine Rioja sack Saint_Emilion Soave zinfandel Sauterne, Sauternes straw_wine table_wine Tokay vin_ordinaire vermouth sweet_vermouth, Italian_vermouth dry_vermouth, French_vermouth Chenin_blanc Verdicchio Vouvray Yquem generic, generic_wine varietal, varietal_wine fortified_wine Madeira malmsey port, port_wine sherry Marsala muscat, muscatel, muscadel, muscadelle liquor, spirits, booze, hard_drink, hard_liquor, John_Barleycorn, strong_drink neutral_spirits, ethyl_alcohol aqua_vitae, ardent_spirits eau_de_vie moonshine, bootleg, corn_liquor bathtub_gin aquavit, akvavit arrack, arak bitters brandy applejack Calvados Armagnac Cognac grappa kirsch slivovitz gin sloe_gin geneva, Holland_gin, Hollands grog ouzo rum demerara, demerara_rum Jamaica_rum schnapps, schnaps pulque mescal tequila vodka whiskey, whisky blended_whiskey, blended_whisky bourbon corn_whiskey, corn_whisky, corn firewater Irish, Irish_whiskey, Irish_whisky poteen rye, rye_whiskey, rye_whisky Scotch, Scotch_whiskey, Scotch_whisky, malt_whiskey, malt_whisky, Scotch_malt_whiskey, Scotch_malt_whisky sour_mash, sour_mash_whiskey liqueur, cordial absinth, absinthe amaretto anisette, anisette_de_Bordeaux benedictine Chartreuse coffee_liqueur creme_de_cacao creme_de_menthe creme_de_fraise Drambuie Galliano orange_liqueur curacao, curacoa triple_sec Grand_Marnier kummel maraschino, maraschino_liqueur pastis Pernod pousse-cafe Kahlua ratafia, ratafee sambuca mixed_drink cocktail Dom_Pedro highball mixer bishop Bloody_Mary Virgin_Mary, bloody_shame bullshot cobbler collins, Tom_Collins cooler refresher smoothie daiquiri, rum_cocktail strawberry_daiquiri NADA_daiquiri spritzer flip gimlet gin_and_tonic grasshopper Harvey_Wallbanger julep, mint_julep manhattan Rob_Roy margarita martini gin_and_it vodka_martini old_fashioned pink_lady Sazerac screwdriver sidecar Scotch_and_soda sling brandy_sling gin_sling rum_sling sour whiskey_sour, whisky_sour stinger swizzle hot_toddy, toddy zombie, zombi fizz Irish_coffee cafe_au_lait cafe_noir, demitasse decaffeinated_coffee, decaf drip_coffee espresso caffe_latte, latte cappuccino, cappuccino_coffee, coffee_cappuccino iced_coffee, ice_coffee instant_coffee mocha, mocha_coffee mocha cassareep Turkish_coffee chocolate_milk cider, cyder hard_cider scrumpy sweet_cider mulled_cider perry rotgut slug cocoa, chocolate, hot_chocolate, drinking_chocolate criollo juice fruit_juice, fruit_crush nectar apple_juice cranberry_juice grape_juice must grapefruit_juice orange_juice frozen_orange_juice, orange-juice_concentrate pineapple_juice lemon_juice lime_juice papaya_juice tomato_juice carrot_juice V-8_juice koumiss, kumis fruit_drink, ade lemonade limeade orangeade malted_milk mate mulled_wine negus soft_drink pop, soda, soda_pop, soda_water, tonic birch_beer bitter_lemon cola, dope cream_soda egg_cream ginger_ale, ginger_pop orange_soda phosphate Coca_Cola, Coke Pepsi, Pepsi_Cola root_beer sarsaparilla tonic, tonic_water, quinine_water coffee_bean, coffee_berry, coffee coffee, java cafe_royale, coffee_royal fruit_punch milk_punch mimosa, buck's_fizz pina_colada punch cup champagne_cup claret_cup wassail planter's_punch White_Russian fish_house_punch May_wine eggnog cassiri spruce_beer rickey gin_rickey tea, tea_leaf tea_bag tea tea-like_drink cambric_tea cuppa, cupper herb_tea, herbal_tea, herbal tisane camomile_tea ice_tea, iced_tea sun_tea black_tea congou, congo, congou_tea, English_breakfast_tea Darjeeling orange_pekoe, pekoe souchong, soochong green_tea hyson oolong water bottled_water branch_water spring_water sugar_water drinking_water ice_water soda_water, carbonated_water, club_soda, seltzer, sparkling_water mineral_water seltzer Vichy_water perishable, spoilable couscous ramekin, ramequin multivitamin, multivitamin_pill vitamin_pill soul_food mold, mould people collection, aggregation, accumulation, assemblage book, rule_book library baseball_club, ball_club, club, nine crowd class, form, grade, course core, nucleus, core_group concert_band, military_band dance wedding, wedding_party chain, concatenation power_breakfast aerie, aery, eyrie, eyry agora amusement_park, funfair, pleasure_ground aphelion apron interplanetary_space interstellar_space intergalactic_space bush semidesert beam-ends bridgehead bus_stop campsite, campground, camping_site, camping_ground, bivouac, encampment, camping_area detention_basin cemetery, graveyard, burial_site, burial_ground, burying_ground, memorial_park, necropolis trichion, crinion city, metropolis, urban_center business_district, downtown outskirts borough cow_pasture crest eparchy, exarchate suburb, suburbia, suburban_area stockbroker_belt crawlspace, crawl_space sheikdom, sheikhdom residence, abode domicile, legal_residence dude_ranch farmland, farming_area midfield firebreak, fireguard flea_market battlefront, front, front_line garbage_heap, junk_heap, rubbish_heap, scrapheap, trash_heap, junk_pile, trash_pile, refuse_heap benthos, benthic_division, benthonic_zone goldfield grainfield, grain_field half-mast, half-staff hemline heronry hipline hipline hole-in-the-wall junkyard isoclinic_line, isoclinal littoral, litoral, littoral_zone, sands magnetic_pole grassland mecca observer's_meridian prime_meridian nombril no-parking_zone outdoors, out-of-doors, open_air, open fairground pasture, pastureland, grazing_land, lea, ley perihelion periselene, perilune locus_of_infection kasbah, casbah waterfront resort, resort_hotel, holiday_resort resort_area, playground, vacation_spot rough ashram harborage, harbourage scrubland weald wold schoolyard showplace bedside sideline, out_of_bounds ski_resort soil_horizon geological_horizon coal_seam coalface field oilfield Temperate_Zone terreplein three-mile_limit desktop top kampong, campong subtropics, semitropics barrio veld, veldt vertex, peak, apex, acme waterline, water_line, water_level high-water_mark low-water_mark continental_divide zodiac Aegean_island sultanate Swiss_canton abyssal_zone aerie, aery, eyrie, eyry air_bubble alluvial_flat, alluvial_plain alp Alpine_glacier, Alpine_type_of_glacier anthill, formicary aquifer archipelago arete arroyo ascent, acclivity, rise, raise, climb, upgrade asterism asthenosphere atoll bank bank bar barbecue_pit barrier_reef baryon, heavy_particle basin beach honeycomb belay ben berm bladder_stone, cystolith bluff borrow_pit brae bubble burrow, tunnel butte caldera canyon, canon canyonside cave cavern chasm cirque, corrie, cwm cliff, drop, drop-off cloud coast coastland col, gap collector comet continental_glacier coral_reef cove crag crater cultivated_land, farmland, plowland, ploughland, tilled_land, tillage, tilth dale defile, gorge delta descent, declivity, fall, decline, declination, declension, downslope diapir divot divot down downhill draw drey drumlin dune, sand_dune escarpment, scarp esker fireball flare_star floor fomite, vehicle foothill footwall foreland foreshore gauge_boson geological_formation, formation geyser glacier glen gopher_hole gorge grotto, grot growler gulch, flume gully hail highland, upland hill hillside hole, hollow hollow, holler hot_spring, thermal_spring iceberg, berg icecap, ice_cap ice_field ice_floe, floe ice_mass inclined_fault ion isthmus kidney_stone, urinary_calculus, nephrolith, renal_calculus knoll, mound, hillock, hummock, hammock kopje, koppie Kuiper_belt, Edgeworth-Kuiper_belt lake_bed, lake_bottom lakefront lakeside, lakeshore landfall landfill lather leak ledge, shelf lepton lithosphere, geosphere lowland lunar_crater maar massif meander mesa, table meteorite microfossil midstream molehill monocline mountain, mount mountainside, versant mouth mull natural_depression, depression natural_elevation, elevation nullah ocean ocean_floor, sea_floor, ocean_bottom, seabed, sea_bottom, Davy_Jones's_locker, Davy_Jones oceanfront outcrop, outcropping, rock_outcrop oxbow pallasite perforation photosphere piedmont Piedmont_glacier, Piedmont_type_of_glacier pinetum plage plain, field, champaign point polar_glacier pothole, chuckhole precipice promontory, headland, head, foreland ptyalith pulsar quicksand rabbit_burrow, rabbit_hole radiator rainbow range, mountain_range, range_of_mountains, chain, mountain_chain, chain_of_mountains rangeland ravine reef ridge ridge, ridgeline rift_valley riparian_forest ripple_mark riverbank, riverside riverbed, river_bottom rock, stone roof saltpan sandbank sandbar, sand_bar sandpit sanitary_landfill sawpit scablands seashore, coast, seacoast, sea-coast seaside, seaboard seif_dune shell shiner shoal shore shoreline sinkhole, sink, swallow_hole ski_slope sky slope, incline, side snowcap snowdrift snowfield soapsuds, suds, lather spit, tongue spoor spume star steep steppe strand streambed, creek_bed sun, Sun supernova swale swamp, swampland swell tableland, plateau talus, scree tangle tar_pit terrace, bench tidal_basin tideland tor tor Trapezium troposphere tundra twinkler uphill urolith valley, vale vehicle-borne_transmission vein, mineral_vein volcanic_crater, crater volcano wadi wall warren, rabbit_warren wasp's_nest, wasps'_nest, hornet's_nest, hornets'_nest watercourse waterside water_table, water_level, groundwater_level whinstone, whin wormcast xenolith Circe gryphon, griffin, griffon spiritual_leader messiah, christ Rhea_Silvia, Rea_Silvia number_one adventurer, venturer anomaly, unusual_person appointee, appointment argonaut Ashkenazi benefactor, helper color-blind_person commoner, common_man, common_person conservator contrarian contadino contestant cosigner, cosignatory discussant enologist, oenologist, fermentologist entertainer eulogist, panegyrist ex-gambler experimenter experimenter exponent ex-president face female, female_person finisher inhabitant, habitant, dweller, denizen, indweller native, indigen, indigene, aborigine, aboriginal native juvenile, juvenile_person lover male, male_person mediator, go-between, intermediator, intermediary, intercessor mediatrix national, subject peer, equal, match, compeer prize_winner, lottery_winner recipient, receiver religionist sensualist traveler, traveller unwelcome_person, persona_non_grata unskilled_person worker wrongdoer, offender Black_African Afrikaner, Afrikander, Boer Aryan Black, Black_person, blackamoor, Negro, Negroid Black_woman mulatto White, White_person, Caucasian Circassian Semite Chaldean, Chaldaean, Chaldee Elamite white_man WASP, white_Anglo-Saxon_Protestant gook, slant-eye Mongol, Mongolian Tatar, Tartar, Mongol_Tatar Nahuatl Aztec Olmec Biloxi Blackfoot Brule Caddo Cheyenne Chickasaw Cocopa, Cocopah Comanche Creek Delaware Diegueno Esselen Eyeish Havasupai Hunkpapa Iowa, Ioway Kalapooia, Kalapuya, Calapooya, Calapuya Kamia Kekchi Kichai Kickapoo Kiliwa, Kiliwi Malecite Maricopa Mohican, Mahican Muskhogean, Muskogean Navaho, Navajo Nootka Oglala, Ogalala Osage Oneida Paiute, Piute Passamaquody Penobscot Penutian Potawatomi Powhatan kachina Salish Shahaptian, Sahaptin, Sahaptino Shasta Shawnee Sihasapa Teton, Lakota, Teton_Sioux, Teton_Dakota Taracahitian Tarahumara Tuscarora Tutelo Yana Yavapai Yokuts Yuma Gadaba Kolam Kui Toda Tulu Gujarati, Gujerati Kashmiri Punjabi, Panjabi Slav Anabaptist Adventist, Second_Adventist gentile, non-Jew, goy gentile Catholic Old_Catholic Uniat, Uniate, Uniate_Christian Copt Jewess Jihadist Buddhist Zen_Buddhist Mahayanist swami Hare_Krishna Shintoist Eurafrican Eurasian Gael Frank Afghan, Afghanistani Albanian Algerian Altaic Andorran Angolan Anguillan Austrian Bahamian Bahraini, Bahreini Basotho Herero Luba, Chiluba Barbadian Bolivian Bornean Carioca Tupi Bruneian Bulgarian Byelorussian, Belorussian, White_Russian Cameroonian Canadian French_Canadian Central_American Chilean Congolese Cypriot, Cypriote, Cyprian Dane Djiboutian Britisher, Briton, Brit English_person Englishwoman Anglo-Saxon Angle West_Saxon Lombard, Langobard limey, John_Bull Cantabrigian Cornishman Cornishwoman Lancastrian Lancastrian Geordie Oxonian Ethiopian Amhara Eritrean Finn Komi Livonian Lithuanian Selkup, Ostyak-Samoyed Parisian Parisienne Creole Creole Gabonese Greek, Hellene Dorian Athenian Laconian Guyanese Haitian Malay, Malayan Moro Netherlander, Dutchman, Hollander Icelander Iraqi, Iraki Irishman Irishwoman Dubliner Italian Roman Sabine Japanese, Nipponese Jordanian Korean Kenyan Lao, Laotian Lapp, Lapplander, Sami, Saami, Same, Saame Latin_American, Latino Lebanese Levantine Liberian Luxemburger, Luxembourger Macedonian Sabahan Mexican Chicano Mexican-American, Mexicano Namibian Nauruan Gurkha New_Zealander, Kiwi Nicaraguan Nigerian Hausa, Haussa North_American Nova_Scotian, bluenose Omani Pakistani Brahui South_American_Indian Carib, Carib_Indian Filipino Polynesian Qatari, Katari Romanian, Rumanian Muscovite Georgian Sarawakian Scandinavian, Norse, Northman Senegalese Slovene South_African South_American Sudanese Syrian Tahitian Tanzanian Tibetan Togolese Tuareg Turki Chuvash Turkoman, Turkmen, Turcoman Uzbek, Uzbeg, Uzbak, Usbek, Usbeg Ugandan Ukranian Yakut Tungus, Evenk Igbo American Anglo-American Alaska_Native, Alaskan_Native, Native_Alaskan Arkansan, Arkansawyer Carolinian Coloradan Connecticuter Delawarean, Delawarian Floridian German_American Illinoisan Mainer, Down_Easter Marylander Minnesotan, Gopher Nebraskan, Cornhusker New_Hampshirite, Granite_Stater New_Jerseyan, New_Jerseyite, Garden_Stater New_Yorker North_Carolinian, Tarheel Oregonian, Beaver Pennsylvanian, Keystone_Stater Texan Utahan Uruguayan Vietnamese, Annamese Gambian East_German Berliner Prussian Ghanian Guinean Papuan Walloon Yemeni Yugoslav, Jugoslav, Yugoslavian, Jugoslavian Serbian, Serb Xhosa Zairese, Zairean Zimbabwean Zulu Gemini, Twin Sagittarius, Archer Pisces, Fish abbe abbess, mother_superior, prioress abnegator abridger, abbreviator abstractor, abstracter absconder absolver abecedarian aberrant abettor, abetter abhorrer abomination abseiler, rappeller abstainer, ascetic academic_administrator academician accessory_before_the_fact companion accompanist, accompanyist accomplice, confederate account_executive, account_representative, registered_representative, customer's_broker, customer's_man accused accuser acid_head acquaintance, friend acquirer aerialist action_officer active active_citizen actor, histrion, player, thespian, role_player actor, doer, worker addict, nut, freak, junkie, junky adducer adjuster, adjustor, claims_adjuster, claims_adjustor, claim_agent adjutant, aide, aide-de-camp adjutant_general admirer, adorer adoptee adulterer, fornicator adulteress, fornicatress, hussy, jade, loose_woman, slut, strumpet, trollop advertiser, advertizer, adman advisee advocate, advocator, proponent, exponent aeronautical_engineer affiliate affluent aficionado buck_sergeant agent-in-place aggravator, annoyance agitator, fomenter agnostic agnostic, doubter agonist agony_aunt agriculturist, agriculturalist, cultivator, grower, raiser air_attache air_force_officer, commander airhead air_traveler, air_traveller alarmist albino alcoholic, alky, dipsomaniac, boozer, lush, soaker, souse alderman alexic alienee, grantee alienor aliterate, aliterate_person algebraist allegorizer, allegoriser alliterator almoner, medical_social_worker alpinist altar_boy alto ambassador, embassador ambassador ambusher amicus_curiae, friend_of_the_court amoralist amputee analogist analphabet, analphabetic analyst industry_analyst market_strategist anarchist, nihilist, syndicalist anathema, bete_noire ancestor, ascendant, ascendent, antecedent, root anchor, anchorman, anchorperson ancient anecdotist, raconteur angler, troller animator animist annotator announcer announcer anti anti-American anti-Semite, Jew-baiter Anzac ape-man aphakic appellant, plaintiff_in_error appointee apprehender April_fool aspirant, aspirer, hopeful, wannabe, wannabee appreciator appropriator Arabist archaist archbishop archer, bowman architect, designer archivist archpriest, hierarch, high_priest, prelate, primate Aristotelian, Aristotelean, Peripatetic armiger army_attache army_engineer, military_engineer army_officer arranger, adapter, transcriber arrival, arriver, comer arthritic articulator artilleryman, cannoneer, gunner, machine_gunner artist's_model, sitter assayer assemblyman assemblywoman assenter asserter, declarer, affirmer, asseverator, avower assignee assistant, helper, help, supporter assistant_professor associate associate associate_professor astronaut, spaceman, cosmonaut cosmographer, cosmographist atheist athlete, jock attendant, attender, tender attorney_general auditor augur, auspex aunt, auntie, aunty au_pair_girl authoritarian, dictator authority authorizer, authoriser automobile_mechanic, auto-mechanic, car-mechanic, mechanic, grease_monkey aviator, aeronaut, airman, flier, flyer aviatrix, airwoman, aviatress ayah babu, baboo baby, babe, sister baby baby_boomer, boomer baby_farmer back backbencher backpacker, packer backroom_boy, brain_truster backscratcher bad_person baggage bag_lady bailee bailiff bailor bairn baker, bread_maker balancer balker, baulker, noncompliant ball-buster, ball-breaker ball_carrier, runner ballet_dancer ballet_master ballet_mistress balletomane ball_hawk balloonist ballplayer, baseball_player bullfighter, toreador banderillero matador picador bandsman banker bank_robber bankrupt, insolvent bantamweight barmaid baron, big_businessman, business_leader, king, magnate, mogul, power, top_executive, tycoon baron baron bartender, barman, barkeep, barkeeper, mixologist baseball_coach, baseball_manager base_runner, runner basketball_player, basketeer, cager basketweaver, basketmaker Basket_Maker bass, basso bastard, by-blow, love_child, illegitimate_child, illegitimate, whoreson bat_boy bather batman baton_twirler, twirler Bavarian beadsman, bedesman beard beatnik, beat beauty_consultant Bedouin, Beduin bedwetter, bed_wetter, wetter beekeeper, apiarist, apiculturist beer_drinker, ale_drinker beggarman beggarwoman beldam, beldame theist believer, truster bell_founder benedick, benedict berserker, berserk besieger best, topper betrothed Big_Brother bigot big_shot, big_gun, big_wheel, big_cheese, big_deal, big_enchilada, big_fish, head_honcho big_sister billiard_player biochemist biographer bird_fancier birth birth-control_campaigner, birth-control_reformer bisexual, bisexual_person black_belt blackmailer, extortioner, extortionist Black_Muslim blacksmith blade bleacher blind_date bluecoat bluestocking, bas_bleu boatbuilder boatman, boater, waterman boatswain, bos'n, bo's'n, bosun, bo'sun bobby bodyguard, escort boffin Bolshevik, Marxist, red, bolshie, bolshy Bolshevik, Bolshevist bombshell bondman, bondsman bondwoman, bondswoman, bondmaid bondwoman, bondswoman, bondmaid bond_servant book_agent bookbinder bookkeeper bookmaker bookworm booster, shoplifter, lifter bootblack, shoeblack bootlegger, moonshiner bootmaker, boot_maker borderer border_patrolman botanist, phytologist, plant_scientist bottom_feeder boulevardier bounty_hunter bounty_hunter Bourbon bowler slugger, slogger cub, lad, laddie, sonny, sonny_boy Boy_Scout boy_scout boy_wonder bragger, braggart, boaster, blowhard, line-shooter, vaunter brahman, brahmin brawler breadwinner breaststroker breeder, stock_breeder brick bride bridesmaid, maid_of_honor bridge_agent broadcast_journalist Brother brother-in-law browser Brummie, Brummy buddy, brother, chum, crony, pal, sidekick bull bully bunny, bunny_girl burglar bursar busboy, waiter's_assistant business_editor business_traveler buster busybody, nosy-parker, nosey-parker, quidnunc buttinsky cabinetmaker, furniture_maker caddie, golf_caddie cadet, plebe caller, caller-out call_girl calligrapher, calligraphist campaigner, candidate, nominee camper camp_follower candidate, prospect canonist capitalist captain, headwaiter, maitre_d'hotel, maitre_d' captain, senior_pilot captain captain, chieftain captive captive cardinal cardiologist, heart_specialist, heart_surgeon card_player cardsharp, card_sharp, cardsharper, card_sharper, sharper, sharpie, sharpy, card_shark careerist career_man caregiver caretaker caretaker caricaturist carillonneur caroler, caroller carpenter carper, niggler Cartesian cashier casualty, injured_party casualty casuist, sophist catechist catechumen, neophyte caterer Catholicos cat_fancier Cavalier, Royalist cavalryman, trooper caveman, cave_man, cave_dweller, troglodyte celebrant celebrant, celebrator, celebrater celebrity, famous_person cellist, violoncellist censor censor centenarian centrist, middle_of_the_roader, moderate, moderationist centurion certified_public_accountant, CPA chachka, tsatske, tshatshke, tchotchke, tchotchkeleh chambermaid, fille_de_chambre chameleon champion, champ, title-holder chandler prison_chaplain charcoal_burner charge_d'affaires charioteer charmer, beguiler chartered_accountant chartist, technical_analyst charwoman, char, cleaning_woman, cleaning_lady, woman male_chauvinist, sexist cheapskate, tightwad Chechen checker cheerer cheerleader cheerleader Cheops, Khufu chess_master chief_executive_officer, CEO, chief_operating_officer chief_of_staff chief_petty_officer Chief_Secretary child, kid, youngster, minor, shaver, nipper, small_fry, tiddler, tike, tyke, fry, nestling child, kid child, baby child_prodigy, infant_prodigy, wonder_child chimneysweeper, chimneysweep, sweep chiropractor chit choker choragus choreographer chorus_girl, showgirl, chorine chosen cicerone cigar_smoker cipher, cypher, nobody, nonentity circus_acrobat citizen city_editor city_father city_man city_slicker, city_boy civic_leader, civil_leader civil_rights_leader, civil_rights_worker, civil_rights_activist cleaner clergyman, reverend, man_of_the_cloth cleric, churchman, divine, ecclesiastic clerk clever_Dick, clever_clogs climatologist climber clinician closer, finisher closet_queen clown, buffoon, goof, goofball, merry_andrew clown, buffoon coach, private_instructor, tutor coach, manager, handler pitching_coach coachman coal_miner, collier, pitman coastguardsman cobber cobbler, shoemaker codger, old_codger co-beneficiary cog cognitive_neuroscientist coiffeur coiner collaborator, cooperator, partner, pardner colleen college_student, university_student collegian, college_man, college_boy colonial colonialist colonizer, coloniser coloratura, coloratura_soprano color_guard colossus, behemoth, giant, heavyweight, titan comedian comedienne comer commander commander_in_chief, generalissimo commanding_officer, commandant, commander commissar, political_commissar commissioned_officer commissioned_military_officer commissioner commissioner committee_member committeewoman commodore communicant communist, commie Communist commuter compere complexifier compulsive computational_linguist computer_scientist computer_user Comrade concert-goer, music_lover conciliator, make-peace, pacifier, peacemaker, reconciler conductor confectioner, candymaker Confederate confessor confidant, intimate Confucian, Confucianist rep conqueror, vanquisher Conservative Nonconformist, chapelgoer Anglican consignee consigner, consignor constable constructivist contractor contralto contributor control_freak convalescent convener convict, con, inmate, yard_bird, yardbird copilot, co-pilot copycat, imitator, emulator, ape, aper coreligionist cornerback corporatist correspondent, letter_writer cosmetician cosmopolitan, cosmopolite Cossack cost_accountant co-star costumier, costumer, costume_designer cotter, cottier cotter, cottar counselor, counsellor counterterrorist counterspy, mole countess compromiser countrywoman county_agent, agricultural_agent, extension_agent courtier cousin, first_cousin, cousin-german, full_cousin cover_girl, pin-up, lovely cow craftsman, artisan, journeyman, artificer craftsman, crafter crapshooter crazy, loony, looney, nutcase, weirdo creature, wight creditor creep, weirdo, weirdie, weirdy, spook criminologist critic Croesus cross-examiner, cross-questioner crossover_voter, crossover croupier crown_prince crown_princess cryptanalyst, cryptographer, cryptologist Cub_Scout cuckold cultist curandera curate, minister_of_religion, minister, parson, pastor, rector curator, conservator customer_agent cutter, carver cyberpunk cyborg, bionic_man, bionic_woman cymbalist Cynic cytogeneticist cytologist czar czar, tsar, tzar dad, dada, daddy, pa, papa, pappa, pop dairyman Dalai_Lama, Grand_Lama dallier, dillydallier, dilly-dallier, mope, lounger dancer, professional_dancer, terpsichorean dancer, social_dancer clog_dancer dancing-master, dance_master dark_horse darling, favorite, favourite, pet, dearie, deary, ducky date, escort daughter, girl dawdler, drone, laggard, lagger, trailer, poke day_boarder day_laborer, day_labourer deacon, Protestant_deacon deaconess deadeye deipnosophist dropout deadhead deaf_person debtor, debitor deckhand, roustabout defamer, maligner, slanderer, vilifier, libeler, backbiter, traducer defense_contractor deist, freethinker delegate deliveryman, delivery_boy, deliverer demagogue, demagog, rabble-rouser demigod, superman, Ubermensch demographer, demographist, population_scientist demonstrator, protester den_mother department_head depositor deputy dermatologist, skin_doctor descender designated_hitter designer, intriguer desk_clerk, hotel_desk_clerk, hotel_clerk desk_officer desk_sergeant, deskman, station_keeper detainee, political_detainee detective, investigator, tec, police_detective detective detractor, disparager, depreciator, knocker developer deviationist devisee devisor devourer dialectician diarist, diary_keeper, journalist dietician, dietitian, nutritionist diocesan director, theater_director, theatre_director director dirty_old_man disbeliever, nonbeliever, unbeliever disk_jockey, disc_jockey, dj dispatcher distortionist distributor, distributer district_attorney, DA district_manager diver, plunger divorcee, grass_widow ex-wife, ex divorce_lawyer docent doctor, doc, physician, MD, Dr., medico dodo, fogy, fogey, fossil doge dog_in_the_manger dogmatist, doctrinaire dolichocephalic domestic_partner, significant_other, spousal_equivalent, spouse_equivalent Dominican dominus, dominie, domine, dominee don, father Donatist donna dosser, street_person double, image, look-alike double-crosser, double-dealer, two-timer, betrayer, traitor down-and-out doyenne draftsman, drawer dramatist, playwright dreamer dressmaker, modiste, needlewoman, seamstress, sempstress dressmaker's_model dribbler, driveller, slobberer, drooler dribbler drinker, imbiber, toper, juicer drinker drug_addict, junkie, junky drug_user, substance_abuser, user Druid drum_majorette, majorette drummer drunk drunkard, drunk, rummy, sot, inebriate, wino Druze, Druse dry, prohibitionist dry_nurse duchess duke duffer dunker Dutch_uncle dyspeptic eager_beaver, busy_bee, live_wire, sharpie, sharpy earl earner, wage_earner eavesdropper eccentric, eccentric_person, flake, oddball, geek eclectic, eclecticist econometrician, econometrist economist, economic_expert ectomorph editor, editor_in_chief egocentric, egoist egotist, egoist, swellhead ejaculator elder elder_statesman elected_official electrician, lineman, linesman elegist elocutionist emancipator, manumitter embryologist emeritus emigrant, emigre, emigree, outgoer emissary, envoy empress employee employer enchantress, witch enchantress, temptress, siren, Delilah, femme_fatale encyclopedist, encyclopaedist endomorph enemy, foe, foeman, opposition energizer, energiser, vitalizer, vitaliser, animator end_man end_man, corner_man endorser, indorser enjoyer enlisted_woman enophile, oenophile entrant entrant entrepreneur, enterpriser envoy, envoy_extraordinary, minister_plenipotentiary enzymologist eparch epidemiologist epigone, epigon epileptic Episcopalian equerry equerry erotic escapee escapist, dreamer, wishful_thinker Eskimo, Esquimau, Inuit espionage_agent esthetician, aesthetician etcher ethnologist Etonian etymologist evangelist, revivalist, gospeler, gospeller Evangelist event_planner examiner, inspector examiner, tester, quizzer exarch executant executive_secretary executive_vice_president executrix exegete exhibitor, exhibitioner, shower exhibitionist, show-off exile, expatriate, expat existentialist, existentialist_philosopher, existential_philosopher exorcist, exorciser ex-spouse extern, medical_extern extremist extrovert, extravert eyewitness facilitator fairy_godmother falangist, phalangist falconer, hawker falsifier familiar fan, buff, devotee, lover fanatic, fiend fancier, enthusiast farm_boy farmer, husbandman, granger, sodbuster farmhand, fieldhand, field_hand, farm_worker fascist fascista fatalist, determinist, predestinarian, predestinationist father, male_parent, begetter Father, Padre father-figure father-in-law Fauntleroy, Little_Lord_Fauntleroy Fauve, fauvist favorite_son featherweight federalist fellow_traveler, fellow_traveller female_aristocrat female_offspring female_child, girl, little_girl fence fiance, groom-to-be fielder, fieldsman field_judge fighter_pilot filer film_director, director finder fire_chief, fire_marshal fire-eater, fire-swallower fire-eater, hothead fireman, firefighter, fire_fighter, fire-eater fire_marshall fire_walker first_baseman, first_sacker firstborn, eldest first_lady first_lieutenant, 1st_lieutenant first_offender first_sergeant, sergeant_first_class fishmonger, fishwife flagellant flag_officer flak_catcher, flak, flack_catcher, flack flanker_back, flanker flapper flatmate flatterer, adulator flibbertigibbet, foolish_woman flight_surgeon floorwalker, shopwalker flop, dud, washout Florentine flower_girl flower_girl flutist, flautist, flute_player fly-by-night flyweight flyweight foe, enemy folk_dancer folk_poet follower football_hero football_player, footballer footman forefather, father, sire foremother foreign_agent foreigner, outsider boss foreman forester, tree_farmer, arboriculturist forewoman forger, counterfeiter forward foster-brother, foster_brother foster-father, foster_father foster-mother, foster_mother foster-sister, foster_sister foster-son, foster_son founder, beginner, founding_father, father foundress four-minute_man framer Francophobe freak, monster, monstrosity, lusus_naturae free_agent, free_spirit, freewheeler free_agent freedom_rider free-liver freeloader free_trader Freudian friar, mendicant monk, monastic frontierswoman front_man, front, figurehead, nominal_head, straw_man, strawman frotteur fucker fucker fuddy-duddy fullback funambulist, tightrope_walker fundamentalist fundraiser futurist gadgeteer gagman, gagster, gagwriter gagman, standup_comedian gainer, weight_gainer gal galoot gambist gambler gamine garbage_man, garbageman, garbage_collector, garbage_carter, garbage_hauler, refuse_collector, dustman gardener garment_cutter garroter, garrotter, strangler, throttler, choker gasman gastroenterologist gatherer gawker gendarme general, full_general generator, source, author geneticist genitor gent geologist geophysicist ghostwriter, ghost Gibson_girl girl, miss, missy, young_lady, young_woman, fille girlfriend, girl, lady_friend girlfriend girl_wonder Girondist, Girondin gitano gladiator glassblower gleaner goat_herder, goatherd godchild godfather godparent godson gofer goffer, gopher goldsmith, goldworker, gold-worker golfer, golf_player, linksman gondolier, gondoliere good_guy good_old_boy, good_ole_boy, good_ol'_boy good_Samaritan gossip_columnist gouger governor_general grabber grader graduate_nurse, trained_nurse grammarian, syntactician granddaughter grande_dame grandfather, gramps, granddad, grandad, granddaddy, grandpa Grand_Inquisitor grandma, grandmother, granny, grannie, gran, nan, nanna grandmaster grandparent grantee granter grass_widower, divorced_man great-aunt, grandaunt great_grandchild great_granddaughter great_grandmother great_grandparent great_grandson great-nephew, grandnephew great-niece, grandniece Green_Beret grenadier, grenade_thrower greeter, saluter, welcomer gringo grinner grocer groom, bridegroom groom, bridegroom grouch, grump, crank, churl, crosspatch group_captain grunter prison_guard, jailer, jailor, gaoler, screw, turnkey guard guesser guest, invitee guest guest_of_honor guest_worker, guestworker guide guitarist, guitar_player gunnery_sergeant guru guru guvnor guy, cat, hombre, bozo gymnast gym_rat gynecologist, gynaecologist, woman's_doctor Gypsy, Gipsy, Romany, Rommany, Romani, Roma, Bohemian hack, drudge, hacker hacker, cyber-terrorist, cyberpunk haggler hairdresser, hairstylist, stylist, styler hakim, hakeem Hakka halberdier halfback half_blood hand animal_trainer, handler handyman, jack_of_all_trades, odd-job_man hang_glider hardliner harlequin harmonizer, harmoniser hash_head hatchet_man, iceman hater hatmaker, hatter, milliner, modiste headman, tribal_chief, chieftain, chief headmaster, schoolmaster, master head_nurse hearer, listener, auditor, attender heartbreaker heathen, pagan, gentile, infidel heavyweight heavy heckler, badgerer hedger hedger, equivocator, tergiversator hedonist, pagan, pleasure_seeker heir, inheritor, heritor heir_apparent heiress, inheritress, inheritrix heir_presumptive hellion, heller, devil helmsman, steersman, steerer hire hematologist, haematologist hemiplegic herald, trumpeter herbalist, herb_doctor herder, herdsman, drover hermaphrodite, intersex, gynandromorph, androgyne, epicene, epicene_person heroine heroin_addict hero_worshiper, hero_worshipper Herr highbinder highbrow high_commissioner highflier, highflyer Highlander, Scottish_Highlander, Highland_Scot high-muck-a-muck, pooh-bah high_priest highjacker, hijacker hireling, pensionary historian, historiographer hitchhiker hitter, striker hobbyist holdout holdover, hangover holdup_man, stickup_man homeboy homeboy home_buyer homegirl homeless, homeless_person homeopath, homoeopath honest_woman honor_guard, guard_of_honor hooker hoper hornist horseman, equestrian, horseback_rider horse_trader horsewoman horse_wrangler, wrangler horticulturist, plantsman hospital_chaplain host, innkeeper, boniface host hostess hotelier, hotelkeeper, hotel_manager, hotelman, hosteller housekeeper housemaster housemate house_physician, resident, resident_physician house_sitter housing_commissioner huckster, cheap-jack hugger humanist, humanitarian humanitarian, do-gooder, improver hunk huntress ex-husband, ex hydrologist hyperope hypertensive hypnotist, hypnotizer, hypnotiser, mesmerist, mesmerizer hypocrite, dissembler, dissimulator, phony, phoney, pretender iceman iconoclast ideologist, ideologue idol, matinee_idol idolizer, idoliser imam, imaum imperialist important_person, influential_person, personage inamorato incumbent, officeholder incurable inductee industrialist infanticide inferior infernal infielder infiltrator informer, betrayer, rat, squealer, blabber ingenue ingenue polymath in-law, relative-in-law inquiry_agent inspector inspector_general instigator, initiator insurance_broker, insurance_agent, general_agent, underwriter insurgent, insurrectionist, freedom_fighter, rebel intelligence_analyst interior_designer, designer, interior_decorator, house_decorator, room_decorator, decorator interlocutor, conversational_partner interlocutor, middleman International_Grandmaster internationalist internist interpreter, translator interpreter intervenor introvert invader, encroacher invalidator, voider, nullifier investigator investor invigilator irreligionist Ivy_Leaguer Jack_of_all_trades Jacksonian Jane_Doe janissary Jat Javanese, Javan Jekyll_and_Hyde jester, fool, motley_fool Jesuit jezebel jilt jobber, middleman, wholesaler job_candidate Job's_comforter jockey John_Doe journalist judge, justice, jurist judge_advocate juggler Jungian junior junior Junior, Jr, Jnr junior_lightweight junior_middleweight jurist, legal_expert juror, juryman, jurywoman justice_of_the_peace justiciar, justiciary kachina keyboardist Khedive kingmaker king, queen, world-beater King's_Counsel Counsel_to_the_Crown kin, kinsperson, family enate, matrikin, matrilineal_kin, matrisib, matrilineal_sib kink kinswoman kisser, osculator kitchen_help kitchen_police, KP Klansman, Ku_Kluxer, Kluxer kleptomaniac kneeler knight knocker knower, apprehender know-it-all, know-all kolkhoznik Kshatriya labor_coach, birthing_coach, doula, monitrice laborer, manual_laborer, labourer, jack Labourite lady lady-in-waiting lady's_maid lama lamb, dear lame_duck lamplighter land_agent landgrave landlubber, lubber, landsman landlubber, landsman, landman landowner, landholder, property_owner landscape_architect, landscape_gardener, landscaper, landscapist langlaufer languisher lapidary, lapidarist lass, lassie, young_girl, jeune_fille Latin Latin latitudinarian Jehovah's_Witness law_agent lawgiver, lawmaker lawman, law_officer, peace_officer law_student lawyer, attorney lay_reader lazybones leaker leaseholder, lessee lector, lecturer, reader lector, reader lecturer left-hander, lefty, southpaw legal_representative legate, official_emissary legatee legionnaire, legionary letterman liberator licenser licentiate lieutenant lieutenant_colonel, light_colonel lieutenant_commander lieutenant_junior_grade, lieutenant_JG life lifeguard, lifesaver life_tenant light_flyweight light_heavyweight, cruiserweight light_heavyweight light-o'-love, light-of-love lightweight lightweight lightweight lilliputian limnologist lineman line_officer lion-hunter lisper lister literary_critic literate, literate_person litigant, litigator litterer, litterbug, litter_lout little_brother little_sister lobbyist locksmith locum_tenens, locum Lord, noble, nobleman loser loser, also-ran failure, loser, nonstarter, unsuccessful_person Lothario loudmouth, blusterer lowerclassman, underclassman Lowlander, Scottish_Lowlander, Lowland_Scot loyalist, stalwart Luddite lumberman, lumberjack, logger, feller, faller lumper bedlamite pyromaniac lutist, lutanist, lutenist Lutheran lyricist, lyrist macebearer, mace, macer machinist, mechanic, shop_mechanic madame maenad maestro, master magdalen magician, prestidigitator, conjurer, conjuror, illusionist magus maharani, maharanee mahatma maid, maiden maid, maidservant, housemaid, amah major major major-domo, seneschal maker, shaper malahini malcontent malik malingerer, skulker, shammer Malthusian adonis man man manageress mandarin maneuverer, manoeuvrer maniac Manichaean, Manichean, Manichee manicurist manipulator man-at-arms man_of_action, man_of_deeds man_of_letters manufacturer, producer marcher, parader marchioness, marquise margrave margrave Marine, devil_dog, leatherneck, shipboard_soldier marquess marquis, marquess marshal, marshall martinet, disciplinarian, moralist mascot masochist mason, stonemason masquerader, masker, masquer masseur masseuse master master, captain, sea_captain, skipper master-at-arms master_of_ceremonies, emcee, host masturbator, onanist matchmaker, matcher, marriage_broker mate, first_mate mate mate mater material materialist matriarch, materfamilias matriarch matriculate matron mayor, city_manager mayoress mechanical_engineer medalist, medallist, medal_winner medical_officer, medic medical_practitioner, medical_man medical_scientist medium, spiritualist, sensitive megalomaniac melancholic, melancholiac Melkite, Melchite melter nonmember board_member clansman, clanswoman, clan_member memorizer, memoriser Mendelian mender, repairer, fixer Mesoamerican messmate mestiza meteorologist meter_maid Methodist Metis metropolitan mezzo-soprano, mezzo microeconomist, microeconomic_expert middle-aged_man middlebrow middleweight midwife, accoucheuse mikado, tenno Milanese miler miles_gloriosus military_attache military_chaplain, padre, Holy_Joe, sky_pilot military_leader military_officer, officer military_policeman, MP mill_agent mill-hand, factory_worker millionairess millwright minder mining_engineer minister, government_minister ministrant minor_leaguer, bush_leaguer Minuteman misanthrope, misanthropist misfit mistress mistress, kept_woman, fancy_woman mixed-blood model, poser class_act modeler, modeller modifier molecular_biologist Monegasque, Monacan monetarist moneygrubber moneymaker Mongoloid monolingual monologist moonlighter moralist morosoph morris_dancer mortal_enemy mortgagee, mortgage_holder mortician, undertaker, funeral_undertaker, funeral_director moss-trooper mother, female_parent mother mother mother_figure mother_hen mother-in-law mother's_boy, mamma's_boy, mama's_boy mother's_daughter motorcycle_cop, motorcycle_policeman, speed_cop motorcyclist Mound_Builder mountebank, charlatan mourner, griever, sorrower, lamenter mouthpiece, mouth mover moviegoer, motion-picture_fan muffin_man mugwump, independent, fencesitter Mullah, Mollah, Mulla muncher murderess murder_suspect musher musician, instrumentalist, player musicologist music_teacher musketeer Muslimah mutilator, maimer, mangler mutineer mute, deaf-mute, deaf-and-dumb_person mutterer, mumbler, murmurer muzzler Mycenaen mycologist myope myrmidon mystic, religious_mystic mythologist naif nailer namby-pamby name_dropper namer nan nanny, nursemaid, nurse narc, nark, narcotics_agent narcissist, narcist nark, copper's_nark nationalist nautch_girl naval_commander Navy_SEAL, SEAL obstructionist, obstructor, obstructer, resister, thwarter Nazarene Nazarene, Ebionite Nazi, German_Nazi nebbish, nebbech necker neonate, newborn, newborn_infant, newborn_baby nephew neurobiologist neurologist, brain_doctor neurosurgeon, brain_surgeon neutral neutralist newcomer, fledgling, fledgeling, starter, neophyte, freshman, newbie, entrant newcomer New_Dealer newspaper_editor newsreader, news_reader Newtonian niece niggard, skinflint, scrooge, churl night_porter night_rider, nightrider NIMBY niqaabi nitpicker Nobelist, Nobel_Laureate NOC noncandidate noncommissioned_officer, noncom, enlisted_officer nondescript nondriver nonparticipant nonperson, unperson nonresident nonsmoker Northern_Baptist noticer novelist novitiate, novice nuclear_chemist, radiochemist nudger nullipara number_theorist nurse nursling, nurseling, suckling nymph, houri nymphet nympholept nymphomaniac, nympho oarswoman oboist obscurantist observer, commentator obstetrician, accoucheur occupier occultist wine_lover offerer, offeror office-bearer office_boy officeholder, officer officiant Federal, Fed, federal_official oilman oil_tycoon old-age_pensioner old_boy old_lady old_man oldster, old_person, senior_citizen, golden_ager old-timer, oldtimer, gaffer, old_geezer, antique old_woman oligarch Olympian omnivore oncologist onlooker, looker-on onomancer operator opportunist, self-seeker optimist Orangeman orator, speechmaker, rhetorician, public_speaker, speechifier orderly, hospital_attendant orderly orderly_sergeant ordinand ordinary organ-grinder organist organization_man organizer, organiser, arranger organizer, organiser, labor_organizer originator, conceiver, mastermind ornithologist, bird_watcher orphan orphan osteopath, osteopathist out-and-outer outdoorswoman outfielder outfielder right_fielder right-handed_pitcher, right-hander outlier owner-occupier oyabun packrat padrone padrone page, pageboy painter Paleo-American, Paleo-Amerind, Paleo-Indian paleontologist, palaeontologist, fossilist pallbearer, bearer palmist, palmister, chiromancer pamperer, spoiler, coddler, mollycoddler Panchen_Lama panelist, panellist panhandler paparazzo paperboy paperhanger, paperer paperhanger papoose, pappoose pardoner paretic parishioner park_commissioner Parliamentarian, Member_of_Parliament parliamentary_agent parodist, lampooner parricide parrot partaker, sharer part-timer party party_man, party_liner passenger, rider passer paster pater patient patriarch patriarch patriarch, paterfamilias patriot, nationalist patron, sponsor, supporter patternmaker pawnbroker payer, remunerator peacekeeper peasant pedant, bookworm, scholastic peddler, pedlar, packman, hawker, pitchman pederast, paederast, child_molester penologist pentathlete Pentecostal, Pentecostalist percussionist periodontist peshmerga personality personal_representative personage persona_grata persona_non_grata personification perspirer, sweater pervert, deviant, deviate, degenerate pessimist pest, blighter, cuss, pesterer, gadfly Peter_Pan petitioner, suppliant, supplicant, requester petit_juror, petty_juror pet_sitter, critter_sitter petter, fondler Pharaoh, Pharaoh_of_Egypt pharmacist, druggist, chemist, apothecary, pill_pusher, pill_roller philanthropist, altruist philatelist, stamp_collector philosopher phonetician phonologist photojournalist photometrist, photometrician physical_therapist, physiotherapist physicist piano_maker picker, chooser, selector picnicker, picknicker pilgrim pill pillar, mainstay pill_head pilot Piltdown_man, Piltdown_hoax pimp, procurer, panderer, pander, pandar, fancy_man, ponce pipe_smoker pip-squeak, squirt, small_fry pisser, urinator pitcher, hurler, twirler pitchman placeman, placeseeker placer_miner plagiarist, plagiarizer, plagiariser, literary_pirate, pirate plainsman planner, contriver, deviser planter, plantation_owner plasterer platinum_blond, platinum_blonde platitudinarian playboy, man-about-town, Corinthian player, participant playmate, playfellow pleaser pledger plenipotentiary plier, plyer plodder, slowpoke, stick-in-the-mud, slowcoach plodder, slogger plotter, mapper plumber, pipe_fitter pluralist pluralist poet pointsman point_woman policyholder political_prisoner political_scientist politician, politico, pol, political_leader politician pollster, poll_taker, headcounter, canvasser polluter, defiler pool_player portraitist, portrait_painter, portrayer, limner poseuse positivist, rationalist postdoc, post_doc poster_girl postulator private_citizen problem_solver, solver, convergent_thinker pro-lifer prosthetist postulant potboy, potman poultryman, poulterer power_user power_worker, power-station_worker practitioner, practician prayer, supplicant preceptor, don predecessor preemptor, pre-emptor preemptor, pre-emptor premature_baby, preterm_baby, premature_infant, preterm_infant, preemie, premie presbyter presenter, sponsor presentist preserver president President_of_the_United_States, United_States_President, President, Chief_Executive president, prexy press_agent, publicity_man, public_relations_man, PR_man press_photographer priest prima_ballerina prima_donna, diva prima_donna primigravida, gravida_I primordial_dwarf, hypoplastic_dwarf, true_dwarf, normal_dwarf prince_charming prince_consort princeling Prince_of_Wales princess princess_royal principal, dealer principal, school_principal, head_teacher, head print_seller prior private, buck_private, common_soldier probationer, student_nurse processor process-server proconsul proconsul proctologist proctor, monitor procurator procurer, securer profit_taker programmer, computer_programmer, coder, software_engineer promiser, promisor promoter, booster, plugger promulgator propagandist propagator, disseminator property_man, propman, property_master prophetess prophet prosecutor, public_prosecutor, prosecuting_officer, prosecuting_attorney prospector protectionist protegee protozoologist provost_marshal pruner, trimmer psalmist psephologist psychiatrist, head-shrinker, shrink psychic psycholinguist psychophysicist publican, tavern_keeper pudge puerpera punching_bag punter punter puppeteer puppy, pup purchasing_agent puritan Puritan pursuer pusher, shover pusher, drug_peddler, peddler, drug_dealer, drug_trafficker pusher, thruster putz Pygmy, Pigmy qadi quadriplegic quadruplet, quad quaker, trembler quarter quarterback, signal_caller, field_general quartermaster quartermaster_general Quebecois queen, queen_regnant, female_monarch Queen_of_England queen queen queen_consort queen_mother Queen's_Counsel question_master, quizmaster quick_study, sponge quietist quitter rabbi racist, racialist radiobiologist radiologic_technologist radiologist, radiotherapist rainmaker raiser raja, rajah rake, rakehell, profligate, rip, blood, roue ramrod ranch_hand ranker ranter, raver rape_suspect rapper rapporteur rare_bird, rara_avis ratepayer raw_recruit reader reading_teacher realist real_estate_broker, real_estate_agent, estate_agent, land_agent, house_agent rear_admiral receiver reciter recruit, enlistee recruit, military_recruit recruiter recruiting-sergeant redcap redhead, redheader, red-header, carrottop redneck, cracker reeler reenactor referral referee, ref refiner Reform_Jew registered_nurse, RN registrar Regius_professor reliever, allayer, comforter anchorite, hermit religious_leader remover Renaissance_man, generalist renegade rentier repairman, maintenance_man, service_man reporter, newsman, newsperson newswoman representative reprobate, miscreant rescuer, recoverer, saver reservist resident_commissioner respecter restaurateur, restauranter restrainer, controller retailer, retail_merchant retiree, retired_person returning_officer revenant revisionist revolutionist, revolutionary, subversive, subverter rheumatologist Rhodesian_man, Homo_rhodesiensis rhymer, rhymester, versifier, poetizer, poetiser rich_person, wealthy_person, have rider riding_master rifleman right-hander, right_hander, righthander right-hand_man, chief_assistant, man_Friday ringer ringleader roadman, road_mender roarer, bawler, bellower, screamer, screecher, shouter, yeller rocket_engineer, rocket_scientist rocket_scientist rock_star Romanov, Romanoff romanticist, romantic ropemaker, rope-maker, roper roper roper ropewalker, ropedancer rosebud Rosicrucian Mountie Rough_Rider roundhead civil_authority, civil_officer runner runner runner running_back rusher rustic saboteur, wrecker, diversionist sadist sailing_master, navigator sailor, crewman salesgirl, saleswoman, saleslady salesman salesperson, sales_representative, sales_rep salvager, salvor sandwichman sangoma sannup sapper Sassenach satrap saunterer, stroller, ambler Savoyard sawyer scalper scandalmonger scapegrace, black_sheep scene_painter schemer, plotter schizophrenic schlemiel, shlemiel schlockmeister, shlockmeister scholar, scholarly_person, bookman, student scholiast schoolchild, school-age_child, pupil schoolfriend Schoolman, medieval_Schoolman schoolmaster schoolmate, classmate, schoolfellow, class_fellow scientist scion scoffer, flouter, mocker, jeerer scofflaw scorekeeper, scorer scorer scourer scout, talent_scout scoutmaster scrambler scratcher screen_actor, movie_actor scrutineer, canvasser scuba_diver sculptor, sculpturer, carver, statue_maker Sea_Scout seasonal_worker, seasonal seasoner second_baseman, second_sacker second_cousin seconder second_fiddle, second_banana second-in-command second_lieutenant, 2nd_lieutenant second-rater, mediocrity secretary Secretary_of_Agriculture, Agriculture_Secretary Secretary_of_Health_and_Human_Services Secretary_of_State Secretary_of_the_Interior, Interior_Secretary sectarian, sectary, sectarist section_hand secularist security_consultant seeded_player, seed seeder, cloud_seeder seeker, searcher, quester segregate segregator, segregationist selectman selectwoman selfish_person self-starter seller, marketer, vender, vendor, trafficker selling_agent semanticist, semiotician semifinalist seminarian, seminarist senator sendee senior senior_vice_president separatist, separationist septuagenarian serf, helot, villein spree_killer serjeant-at-law, serjeant, sergeant-at-law, sergeant server serviceman, military_man, man, military_personnel settler, colonist settler sex_symbol sexton, sacristan shaheed Shakespearian, Shakespearean shanghaier, seizer sharecropper, cropper, sharecrop_farmer shaver Shavian sheep sheik, tribal_sheik, sheikh, tribal_sheikh, Arab_chief shelver shepherd ship-breaker shipmate shipowner shipping_agent shirtmaker shogun shopaholic shop_girl shop_steward, steward shot_putter shrew, termagant shuffler shyster, pettifogger sibling, sib sick_person, diseased_person, sufferer sightreader signaler, signaller signer signor, signior signora signore signorina silent_partner, sleeping_partner addle-head, addlehead, loon, birdbrain simperer singer, vocalist, vocalizer, vocaliser Sinologist sipper sirrah Sister sister, sis waverer, vacillator, hesitator, hesitater sitar_player sixth-former skateboarder skeptic, sceptic, doubter sketcher skidder skier skinny-dipper skin-diver, aquanaut skinhead slasher slattern, slut, slovenly_woman, trollop sleeper, slumberer sleeper sleeping_beauty sleuth, sleuthhound slob, sloven, pig, slovenly_person sloganeer slopseller, slop-seller smasher, stunner, knockout, beauty, ravisher, sweetheart, peach, lulu, looker, mantrap, dish smirker smith, metalworker smoothie, smoothy, sweet_talker, charmer smuggler, runner, contrabandist, moon_curser, moon-curser sneezer snob, prig, snot, snoot snoop, snooper snorer sob_sister soccer_player social_anthropologist, cultural_anthropologist social_climber, climber socialist socializer, socialiser social_scientist social_secretary Socinian sociolinguist sociologist soda_jerk, soda_jerker sodalist sodomite, sodomist, sod, bugger soldier son, boy songster songstress songwriter, songster, ballad_maker sorcerer, magician, wizard, necromancer, thaumaturge, thaumaturgist sorehead soul_mate Southern_Baptist sovereign, crowned_head, monarch spacewalker Spanish_American, Hispanic_American, Hispanic sparring_partner, sparring_mate spastic speaker, talker, utterer, verbalizer, verbaliser native_speaker Speaker speechwriter specialist, medical_specialist specifier spectator, witness, viewer, watcher, looker speech_therapist speedskater, speed_skater spellbinder sphinx spinster, old_maid split_end sport, sportsman, sportswoman sport, summercater sporting_man, outdoor_man sports_announcer, sportscaster, sports_commentator sports_editor sprog square_dancer square_shooter, straight_shooter, straight_arrow squatter squire squire staff_member, staffer staff_sergeant stage_director stainer stakeholder stalker stalking-horse stammerer, stutterer stamper, stomper, tramper, trampler standee stand-in, substitute, relief, reliever, backup, backup_man, fill-in star, principal, lead starlet starter, dispatcher statesman, solon, national_leader state_treasurer stationer, stationery_seller stenographer, amanuensis, shorthand_typist stentor stepbrother, half-brother, half_brother stepmother stepparent stevedore, loader, longshoreman, docker, dockhand, dock_worker, dockworker, dock-walloper, lumper steward steward, flight_attendant steward stickler stiff stifler, smotherer stipendiary, stipendiary_magistrate stitcher stockjobber stock_trader stockist stoker, fireman stooper store_detective strafer straight_man, second_banana stranger, alien, unknown stranger strategist, strategian straw_boss, assistant_foreman streetwalker, street_girl, hooker, hustler, floozy, floozie, slattern stretcher-bearer, litter-bearer struggler stud, he-man, macho-man student, pupil, educatee stumblebum, palooka stylist subaltern subcontractor subduer, surmounter, overcomer subject, case, guinea_pig subordinate, subsidiary, underling, foot_soldier substitute, reserve, second-stringer successor, heir successor, replacement succorer, succourer Sufi suffragan, suffragan_bishop suffragette sugar_daddy suicide_bomber suitor, suer, wooer sumo_wrestler sunbather sundowner super_heavyweight superior, higher-up, superordinate supermom supernumerary, spear_carrier, extra supremo surgeon, operating_surgeon, sawbones Surgeon_General Surgeon_General surpriser surveyor surveyor survivor, subsister sutler, victualer, victualler, provisioner sweeper sweetheart, sweetie, steady, truelove swinger, tramp switcher, whipper swot, grind, nerd, wonk, dweeb sycophant, toady, crawler, lackey, ass-kisser sylph sympathizer, sympathiser, well-wisher symphonist syncopator syndic tactician tagger tailback tallyman, tally_clerk tallyman tanker, tank_driver tapper, wiretapper, phone_tapper Tartuffe, Tartufe Tarzan taster, taste_tester, taste-tester, sampler tax_assessor, assessor taxer taxi_dancer taxonomist, taxonomer, systematist teacher, instructor teaching_fellow tearaway technical_sergeant technician Ted, Teddy_boy teetotaler, teetotaller, teetotalist television_reporter, television_newscaster, TV_reporter, TV_newsman temporizer, temporiser tempter term_infant toiler tenant, renter tenant tenderfoot tennis_player tennis_pro, professional_tennis_player tenor_saxophonist, tenorist termer terror, scourge, threat tertigravida, gravida_III testator, testate testatrix testee, examinee test-tube_baby Texas_Ranger, Ranger thane theatrical_producer theologian, theologist, theologizer, theologiser theorist, theoretician, theorizer, theoriser, idealogue theosophist therapist, healer Thessalonian thinker, creative_thinker, mind thinker thrower thurifer ticket_collector, ticket_taker tight_end tiler timekeeper, timer Timorese tinkerer, fiddler tinsmith, tinner tinter tippler, social_drinker tipster, tout T-man toastmaster, symposiarch toast_mistress tobogganist tomboy, romp, hoyden toolmaker torchbearer Tory Tory tosser tosser, jerk-off, wanker totalitarian tourist, tourer, holidaymaker tout, touter tout, ticket_tout tovarich, tovarisch towhead town_clerk town_crier, crier townsman, towner toxicologist track_star trader, bargainer, dealer, monger trade_unionist, unionist, union_member traditionalist, diehard traffic_cop tragedian tragedian tragedienne trail_boss trainer traitor, treasonist traitress transactor transcriber transfer, transferee transferee translator, transcriber transvestite, cross-dresser traveling_salesman, travelling_salesman, commercial_traveler, commercial_traveller, roadman, bagman traverser trawler Treasury, First_Lord_of_the_Treasury trencher trend-setter, taste-maker, fashion_arbiter tribesman trier, attempter, essayer trifler trooper trooper, state_trooper Trotskyite, Trotskyist, Trot truant, hooky_player trumpeter, cornetist trusty Tudor tumbler tutee twin two-timer Tyke tympanist, timpanist typist tyrant, autocrat, despot umpire, ump understudy, standby undesirable unicyclist unilateralist Unitarian Arminian universal_donor UNIX_guru Unknown_Soldier upsetter upstager upstart, parvenu, nouveau-riche, arriviste upstart urchin urologist usherette usher, doorkeeper usurper, supplanter utility_man utilizer, utiliser Utopian uxoricide vacationer, vacationist valedictorian, valedictory_speaker valley_girl vaulter, pole_vaulter, pole_jumper vegetarian vegan venerator venture_capitalist venturer, merchant-venturer vermin, varmint very_important_person, VIP, high-up, dignitary, panjandrum, high_muckamuck vibist, vibraphonist vicar vicar vicar-general vice_chancellor vicegerent vice_president, V.P. vice-regent victim, dupe Victorian victualer, victualler vigilante, vigilance_man villager vintager vintner, wine_merchant violator, debaucher, ravisher violator, lawbreaker, law_offender violist virago virologist Visayan, Bisayan viscountess viscount Visigoth visionary visiting_fireman visiting_professor visualizer, visualiser vixen, harpy, hellcat vizier voicer volunteer, unpaid_worker volunteer, military_volunteer, voluntary votary votary vouchee vower voyager voyeur, Peeping_Tom, peeper vulcanizer, vulcaniser waffler Wagnerian waif, street_child wailer waiter, server waitress walking_delegate walk-on wallah wally waltzer wanderer, roamer, rover, bird_of_passage Wandering_Jew wanton warrantee warrantee washer washerman, laundryman washwoman, washerwoman, laundrywoman, laundress wassailer, carouser wastrel, waster Wave weatherman, weather_forecaster weekend_warrior weeder welder welfare_case, charity_case westerner West-sider wetter whaler Whig whiner, complainer, moaner, sniveller, crybaby, bellyacher, grumbler, squawker whipper-in whisperer whiteface Carmelite, White_Friar Augustinian white_hope, great_white_hope white_supremacist whoremaster, whoremonger whoremaster, whoremonger, john, trick widow, widow_woman wife, married_woman wiggler, wriggler, squirmer wimp, chicken, crybaby wing_commander winger winner winner, victor window_dresser, window_trimmer winker wiper wireman, wirer wise_guy, smart_aleck, wiseacre, wisenheimer, weisenheimer witch_doctor withdrawer withdrawer woman, adult_female woman wonder_boy, golden_boy wonderer working_girl workman, workingman, working_man, working_person workmate worldling worshiper, worshipper worthy wrecker wright write-in_candidate, write-in writer, author Wykehamist yakuza yard_bird, yardbird yardie yardman yardmaster, trainmaster, train_dispatcher yenta yogi young_buck, young_man young_Turk Young_Turk Zionist zoo_keeper Genet, Edmund_Charles_Edouard_Genet, Citizen_Genet Kennan, George_F._Kennan, George_Frost_Kennan Munro, H._H._Munro, Hector_Hugh_Munro, Saki Popper, Karl_Popper, Sir_Karl_Raimund_Popper Stoker, Bram_Stoker, Abraham_Stoker Townes, Charles_Townes, Charles_Hard_Townes dust_storm, duster, sandstorm, sirocco parhelion, mock_sun, sundog snow, snowfall facula wave microflora wilding semi-climber volva basidiocarp domatium apomict aquatic bryophyte, nonvascular_plant acrocarp, acrocarpous_moss sphagnum, sphagnum_moss, peat_moss, bog_moss liverwort, hepatic hepatica, Marchantia_polymorpha pecopteris pteridophyte, nonflowering_plant fern fern_ally spore carpospore chlamydospore conidium, conidiospore oospore tetraspore zoospore cryptogam spermatophyte, phanerogam, seed_plant seedling annual biennial perennial hygrophyte gymnosperm gnetum, Gnetum_gnemon Catha_edulis ephedra, joint_fir mahuang, Ephedra_sinica welwitschia, Welwitschia_mirabilis cycad sago_palm, Cycas_revoluta false_sago, fern_palm, Cycas_circinalis zamia coontie, Florida_arrowroot, Seminole_bread, Zamia_pumila ceratozamia dioon encephalartos kaffir_bread, Encephalartos_caffer macrozamia burrawong, Macrozamia_communis, Macrozamia_spiralis pine, pine_tree, true_pine pinon, pinyon nut_pine pinon_pine, Mexican_nut_pine, Pinus_cembroides Rocky_mountain_pinon, Pinus_edulis single-leaf, single-leaf_pine, single-leaf_pinyon, Pinus_monophylla bishop_pine, bishop's_pine, Pinus_muricata California_single-leaf_pinyon, Pinus_californiarum Parry's_pinyon, Pinus_quadrifolia, Pinus_parryana spruce_pine, Pinus_glabra black_pine, Pinus_nigra pitch_pine, northern_pitch_pine, Pinus_rigida pond_pine, Pinus_serotina stone_pine, umbrella_pine, European_nut_pine, Pinus_pinea Swiss_pine, Swiss_stone_pine, arolla_pine, cembra_nut_tree, Pinus_cembra cembra_nut, cedar_nut Swiss_mountain_pine, mountain_pine, dwarf_mountain_pine, mugho_pine, mugo_pine, Pinus_mugo ancient_pine, Pinus_longaeva white_pine American_white_pine, eastern_white_pine, weymouth_pine, Pinus_strobus western_white_pine, silver_pine, mountain_pine, Pinus_monticola southwestern_white_pine, Pinus_strobiformis limber_pine, Pinus_flexilis whitebark_pine, whitebarked_pine, Pinus_albicaulis yellow_pine ponderosa, ponderosa_pine, western_yellow_pine, bull_pine, Pinus_ponderosa Jeffrey_pine, Jeffrey's_pine, black_pine, Pinus_jeffreyi shore_pine, lodgepole, lodgepole_pine, spruce_pine, Pinus_contorta Sierra_lodgepole_pine, Pinus_contorta_murrayana loblolly_pine, frankincense_pine, Pinus_taeda jack_pine, Pinus_banksiana swamp_pine longleaf_pine, pitch_pine, southern_yellow_pine, Georgia_pine, Pinus_palustris shortleaf_pine, short-leaf_pine, shortleaf_yellow_pine, Pinus_echinata red_pine, Canadian_red_pine, Pinus_resinosa Scotch_pine, Scots_pine, Scotch_fir, Pinus_sylvestris scrub_pine, Virginia_pine, Jersey_pine, Pinus_virginiana Monterey_pine, Pinus_radiata bristlecone_pine, Rocky_Mountain_bristlecone_pine, Pinus_aristata table-mountain_pine, prickly_pine, hickory_pine, Pinus_pungens knobcone_pine, Pinus_attenuata Japanese_red_pine, Japanese_table_pine, Pinus_densiflora Japanese_black_pine, black_pine, Pinus_thunbergii Torrey_pine, Torrey's_pine, soledad_pine, grey-leaf_pine, sabine_pine, Pinus_torreyana larch, larch_tree American_larch, tamarack, black_larch, Larix_laricina western_larch, western_tamarack, Oregon_larch, Larix_occidentalis subalpine_larch, Larix_lyallii European_larch, Larix_decidua Siberian_larch, Larix_siberica, Larix_russica golden_larch, Pseudolarix_amabilis fir, fir_tree, true_fir silver_fir amabilis_fir, white_fir, Pacific_silver_fir, red_silver_fir, Christmas_tree, Abies_amabilis European_silver_fir, Christmas_tree, Abies_alba white_fir, Colorado_fir, California_white_fir, Abies_concolor, Abies_lowiana balsam_fir, balm_of_Gilead, Canada_balsam, Abies_balsamea Fraser_fir, Abies_fraseri lowland_fir, lowland_white_fir, giant_fir, grand_fir, Abies_grandis Alpine_fir, subalpine_fir, Abies_lasiocarpa Santa_Lucia_fir, bristlecone_fir, Abies_bracteata, Abies_venusta cedar, cedar_tree, true_cedar cedar_of_Lebanon, Cedrus_libani deodar, deodar_cedar, Himalayan_cedar, Cedrus_deodara Atlas_cedar, Cedrus_atlantica spruce Norway_spruce, Picea_abies weeping_spruce, Brewer's_spruce, Picea_breweriana Engelmann_spruce, Engelmann's_spruce, Picea_engelmannii white_spruce, Picea_glauca black_spruce, Picea_mariana, spruce_pine Siberian_spruce, Picea_obovata Sitka_spruce, Picea_sitchensis oriental_spruce, Picea_orientalis Colorado_spruce, Colorado_blue_spruce, silver_spruce, Picea_pungens red_spruce, eastern_spruce, yellow_spruce, Picea_rubens hemlock, hemlock_tree eastern_hemlock, Canadian_hemlock, spruce_pine, Tsuga_canadensis Carolina_hemlock, Tsuga_caroliniana mountain_hemlock, black_hemlock, Tsuga_mertensiana western_hemlock, Pacific_hemlock, west_coast_hemlock, Tsuga_heterophylla douglas_fir green_douglas_fir, douglas_spruce, douglas_pine, douglas_hemlock, Oregon_fir, Oregon_pine, Pseudotsuga_menziesii big-cone_spruce, big-cone_douglas_fir, Pseudotsuga_macrocarpa Cathaya cedar, cedar_tree cypress, cypress_tree gowen_cypress, Cupressus_goveniana pygmy_cypress, Cupressus_pigmaea, Cupressus_goveniana_pigmaea Santa_Cruz_cypress, Cupressus_abramsiana, Cupressus_goveniana_abramsiana Arizona_cypress, Cupressus_arizonica Guadalupe_cypress, Cupressus_guadalupensis Monterey_cypress, Cupressus_macrocarpa Mexican_cypress, cedar_of_Goa, Portuguese_cypress, Cupressus_lusitanica Italian_cypress, Mediterranean_cypress, Cupressus_sempervirens King_William_pine, Athrotaxis_selaginoides Chilean_cedar, Austrocedrus_chilensis incense_cedar, red_cedar, Calocedrus_decurrens, Libocedrus_decurrens southern_white_cedar, coast_white_cedar, Atlantic_white_cedar, white_cypress, white_cedar, Chamaecyparis_thyoides Oregon_cedar, Port_Orford_cedar, Lawson's_cypress, Lawson's_cedar, Chamaecyparis_lawsoniana yellow_cypress, yellow_cedar, Nootka_cypress, Alaska_cedar, Chamaecyparis_nootkatensis Japanese_cedar, Japan_cedar, sugi, Cryptomeria_japonica juniper_berry incense_cedar kawaka, Libocedrus_plumosa pahautea, Libocedrus_bidwillii, mountain_pine metasequoia, dawn_redwood, Metasequoia_glyptostrodoides arborvitae western_red_cedar, red_cedar, canoe_cedar, Thuja_plicata American_arborvitae, northern_white_cedar, white_cedar, Thuja_occidentalis Oriental_arborvitae, Thuja_orientalis, Platycladus_orientalis hiba_arborvitae, Thujopsis_dolobrata keteleeria Wollemi_pine araucaria monkey_puzzle, chile_pine, Araucaria_araucana norfolk_island_pine, Araucaria_heterophylla, Araucaria_excelsa new_caledonian_pine, Araucaria_columnaris bunya_bunya, bunya_bunya_tree, Araucaria_bidwillii hoop_pine, Moreton_Bay_pine, Araucaria_cunninghamii kauri_pine, dammar_pine kauri, kaury, Agathis_australis amboina_pine, amboyna_pine, Agathis_dammara, Agathis_alba dundathu_pine, queensland_kauri, smooth_bark_kauri, Agathis_robusta red_kauri, Agathis_lanceolata plum-yew California_nutmeg, nutmeg-yew, Torreya_californica stinking_cedar, stinking_yew, Torrey_tree, Torreya_taxifolia celery_pine celery_top_pine, celery-topped_pine, Phyllocladus_asplenifolius tanekaha, Phyllocladus_trichomanoides Alpine_celery_pine, Phyllocladus_alpinus yellowwood, yellowwood_tree gymnospermous_yellowwood podocarp yacca, yacca_podocarp, Podocarpus_coriaceus brown_pine, Rockingham_podocarp, Podocarpus_elatus cape_yellowwood, African_yellowwood, Podocarpus_elongatus South-African_yellowwood, Podocarpus_latifolius alpine_totara, Podocarpus_nivalis totara, Podocarpus_totara common_yellowwood, bastard_yellowwood, Afrocarpus_falcata kahikatea, New_Zealand_Dacryberry, New_Zealand_white_pine, Dacrycarpus_dacrydioides, Podocarpus_dacrydioides rimu, imou_pine, red_pine, Dacrydium_cupressinum tarwood, tar-wood, Dacrydium_colensoi common_sickle_pine, Falcatifolium_falciforme yellow-leaf_sickle_pine, Falcatifolium_taxoides tarwood, tar-wood, New_Zealand_mountain_pine, Halocarpus_bidwilli, Dacrydium_bidwilli westland_pine, silver_pine, Lagarostrobus_colensoi huon_pine, Lagarostrobus_franklinii, Dacrydium_franklinii Chilean_rimu, Lepidothamnus_fonkii mountain_rimu, Lepidothamnus_laxifolius, Dacridium_laxifolius nagi, Nageia_nagi miro, black_pine, Prumnopitys_ferruginea, Podocarpus_ferruginea matai, black_pine, Prumnopitys_taxifolia, Podocarpus_spicata plum-fruited_yew, Prumnopitys_andina, Prumnopitys_elegans Prince_Albert_yew, Prince_Albert's_yew, Saxe-gothea_conspicua Sundacarpus_amara, Prumnopitys_amara, Podocarpus_amara Japanese_umbrella_pine, Sciadopitys_verticillata yew Old_World_yew, English_yew, Taxus_baccata Pacific_yew, California_yew, western_yew, Taxus_brevifolia Japanese_yew, Taxus_cuspidata Florida_yew, Taxus_floridana New_Caledonian_yew, Austrotaxus_spicata white-berry_yew, Pseudotaxus_chienii ginkgo, gingko, maidenhair_tree, Ginkgo_biloba angiosperm, flowering_plant dicot, dicotyledon, magnoliopsid, exogen monocot, monocotyledon, liliopsid, endogen floret, floweret flower bloomer wildflower, wild_flower apetalous_flower inflorescence rosebud gynostegium pollinium pistil gynobase gynophore stylopodium carpophore cornstalk, corn_stalk petiolule mericarp micropyle germ_tube pollen_tube gemma galbulus nectary, honey_gland pericarp, seed_vessel epicarp, exocarp mesocarp pip silique, siliqua cataphyll perisperm monocarp, monocarpic_plant, monocarpous_plant sporophyte gametophyte megasporangium, macrosporangium microspore microsporangium microsporophyll archespore, archesporium bonduc_nut, nicker_nut, nicker_seed Job's_tears oilseed, oil-rich_seed castor_bean cottonseed candlenut peach_pit hypanthium, floral_cup, calyx_tube petal, flower_petal corolla lip perianth, chlamys, floral_envelope, perigone, perigonium thistledown custard_apple, custard_apple_tree cherimoya, cherimoya_tree, Annona_cherimola ilama, ilama_tree, Annona_diversifolia soursop, prickly_custard_apple, soursop_tree, Annona_muricata bullock's_heart, bullock's_heart_tree, bullock_heart, Annona_reticulata sweetsop, sweetsop_tree, Annona_squamosa pond_apple, pond-apple_tree, Annona_glabra pawpaw, papaw, papaw_tree, Asimina_triloba ilang-ilang, ylang-ylang, Cananga_odorata lancewood, lancewood_tree, Oxandra_lanceolata Guinea_pepper, negro_pepper, Xylopia_aethiopica barberry American_barberry, Berberis_canadensis common_barberry, European_barberry, Berberis_vulgaris Japanese_barberry, Berberis_thunbergii Oregon_grape, Oregon_holly_grape, hollygrape, mountain_grape, holly-leaves_barberry, Mahonia_aquifolium Oregon_grape, Mahonia_nervosa mayapple, May_apple, wild_mandrake, Podophyllum_peltatum May_apple allspice Carolina_allspice, strawberry_shrub, strawberry_bush, sweet_shrub, Calycanthus_floridus spicebush, California_allspice, Calycanthus_occidentalis katsura_tree, Cercidiphyllum_japonicum laurel true_laurel, bay, bay_laurel, bay_tree, Laurus_nobilis camphor_tree, Cinnamomum_camphora cinnamon, Ceylon_cinnamon, Ceylon_cinnamon_tree, Cinnamomum_zeylanicum cassia, cassia-bark_tree, Cinnamomum_cassia cassia_bark, Chinese_cinnamon Saigon_cinnamon, Cinnamomum_loureirii cinnamon_bark spicebush, spice_bush, American_spicebush, Benjamin_bush, Lindera_benzoin, Benzoin_odoriferum avocado, avocado_tree, Persea_Americana laurel-tree, red_bay, Persea_borbonia sassafras, sassafras_tree, Sassafras_albidum California_laurel, California_bay_tree, Oregon_myrtle, pepperwood, spice_tree, sassafras_laurel, California_olive, mountain_laurel, Umbellularia_californica anise_tree purple_anise, Illicium_floridanum star_anise, Illicium_anisatum star_anise, Chinese_anise, Illicium_verum magnolia southern_magnolia, evergreen_magnolia, large-flowering_magnolia, bull_bay, Magnolia_grandiflora umbrella_tree, umbrella_magnolia, elkwood, elk-wood, Magnolia_tripetala earleaved_umbrella_tree, Magnolia_fraseri cucumber_tree, Magnolia_acuminata large-leaved_magnolia, large-leaved_cucumber_tree, great-leaved_macrophylla, Magnolia_macrophylla saucer_magnolia, Chinese_magnolia, Magnolia_soulangiana star_magnolia, Magnolia_stellata sweet_bay, swamp_bay, swamp_laurel, Magnolia_virginiana manglietia, genus_Manglietia tulip_tree, tulip_poplar, yellow_poplar, canary_whitewood, Liriodendron_tulipifera moonseed common_moonseed, Canada_moonseed, yellow_parilla, Menispermum_canadense Carolina_moonseed, Cocculus_carolinus nutmeg, nutmeg_tree, Myristica_fragrans water_nymph, fragrant_water_lily, pond_lily, Nymphaea_odorata European_white_lily, Nymphaea_alba southern_spatterdock, Nuphar_sagittifolium lotus, Indian_lotus, sacred_lotus, Nelumbo_nucifera water_chinquapin, American_lotus, yanquapin, Nelumbo_lutea water-shield, fanwort, Cabomba_caroliniana water-shield, Brasenia_schreberi, water-target peony, paeony buttercup, butterflower, butter-flower, crowfoot, goldcup, kingcup meadow_buttercup, tall_buttercup, tall_crowfoot, tall_field_buttercup, Ranunculus_acris water_crowfoot, water_buttercup, Ranunculus_aquatilis lesser_celandine, pilewort, Ranunculus_ficaria lesser_spearwort, Ranunculus_flammula greater_spearwort, Ranunculus_lingua western_buttercup, Ranunculus_occidentalis creeping_buttercup, creeping_crowfoot, Ranunculus_repens cursed_crowfoot, celery-leaved_buttercup, Ranunculus_sceleratus aconite monkshood, helmetflower, helmet_flower, Aconitum_napellus wolfsbane, wolfbane, wolf's_bane, Aconitum_lycoctonum baneberry, cohosh, herb_Christopher baneberry red_baneberry, redberry, red-berry, snakeberry, Actaea_rubra pheasant's-eye, Adonis_annua anemone, windflower Alpine_anemone, mountain_anemone, Anemone_tetonensis Canada_anemone, Anemone_Canadensis thimbleweed, Anemone_cylindrica wood_anemone, Anemone_nemorosa wood_anemone, snowdrop, Anemone_quinquefolia longheaded_thimbleweed, Anemone_riparia snowdrop_anemone, snowdrop_windflower, Anemone_sylvestris Virginia_thimbleweed, Anemone_virginiana rue_anemone, Anemonella_thalictroides columbine, aquilegia, aquilege meeting_house, honeysuckle, Aquilegia_canadensis blue_columbine, Aquilegia_caerulea, Aquilegia_scopulorum_calcarea granny's_bonnets, Aquilegia_vulgaris marsh_marigold, kingcup, meadow_bright, May_blob, cowslip, water_dragon, Caltha_palustris American_bugbane, summer_cohosh, Cimicifuga_americana black_cohosh, black_snakeroot, rattle-top, Cimicifuga_racemosa fetid_bugbane, foetid_bugbane, Cimicifuga_foetida clematis pine_hyacinth, Clematis_baldwinii, Viorna_baldwinii blue_jasmine, blue_jessamine, curly_clematis, marsh_clematis, Clematis_crispa golden_clematis, Clematis_tangutica scarlet_clematis, Clematis_texensis leather_flower, Clematis_versicolor leather_flower, vase-fine, vase_vine, Clematis_viorna virgin's_bower, old_man's_beard, devil's_darning_needle, Clematis_virginiana purple_clematis, purple_virgin's_bower, mountain_clematis, Clematis_verticillaris goldthread, golden_thread, Coptis_groenlandica, Coptis_trifolia_groenlandica rocket_larkspur, Consolida_ambigua, Delphinium_ajacis delphinium larkspur winter_aconite, Eranthis_hyemalis lenten_rose, black_hellebore, Helleborus_orientalis green_hellebore, Helleborus_viridis hepatica, liverleaf goldenseal, golden_seal, yellow_root, turmeric_root, Hydrastis_Canadensis false_rue_anemone, false_rue, Isopyrum_biternatum giant_buttercup, Laccopetalum_giganteum nigella love-in-a-mist, Nigella_damascena fennel_flower, Nigella_hispanica black_caraway, nutmeg_flower, Roman_coriander, Nigella_sativa pasqueflower, pasque_flower meadow_rue false_bugbane, Trautvetteria_carolinensis globeflower, globe_flower winter's_bark, winter's_bark_tree, Drimys_winteri pepper_shrub, Pseudowintera_colorata, Wintera_colorata sweet_gale, Scotch_gale, Myrica_gale wax_myrtle bay_myrtle, puckerbush, Myrica_cerifera bayberry, candleberry, swamp_candleberry, waxberry, Myrica_pensylvanica sweet_fern, Comptonia_peregrina, Comptonia_asplenifolia corkwood, corkwood_tree, Leitneria_floridana jointed_rush, Juncus_articulatus toad_rush, Juncus_bufonius slender_rush, Juncus_tenuis zebrawood, zebrawood_tree Connarus_guianensis legume, leguminous_plant legume peanut granadilla_tree, granadillo, Brya_ebenus arariba, Centrolobium_robustum tonka_bean, coumara_nut courbaril, Hymenaea_courbaril melilotus, melilot, sweet_clover darling_pea, poison_bush smooth_darling_pea, Swainsona_galegifolia clover, trefoil alpine_clover, Trifolium_alpinum hop_clover, shamrock, lesser_yellow_trefoil, Trifolium_dubium crimson_clover, Italian_clover, Trifolium_incarnatum red_clover, purple_clover, Trifolium_pratense buffalo_clover, Trifolium_reflexum, Trifolium_stoloniferum white_clover, dutch_clover, shamrock, Trifolium_repens mimosa acacia shittah, shittah_tree wattle black_wattle, Acacia_auriculiformis gidgee, stinking_wattle, Acacia_cambegei catechu, Jerusalem_thorn, Acacia_catechu silver_wattle, mimosa, Acacia_dealbata huisache, cassie, mimosa_bush, sweet_wattle, sweet_acacia, scented_wattle, flame_tree, Acacia_farnesiana lightwood, Acacia_melanoxylon golden_wattle, Acacia_pycnantha fever_tree, Acacia_xanthophloea coralwood, coral-wood, red_sandalwood, Barbados_pride, peacock_flower_fence, Adenanthera_pavonina albizzia, albizia silk_tree, Albizia_julibrissin, Albizzia_julibrissin siris, siris_tree, Albizia_lebbeck, Albizzia_lebbeck rain_tree, saman, monkeypod, monkey_pod, zaman, zamang, Albizia_saman calliandra conacaste, elephant's_ear, Enterolobium_cyclocarpa inga ice-cream_bean, Inga_edulis guama, Inga_laurina lead_tree, white_popinac, Leucaena_glauca, Leucaena_leucocephala wild_tamarind, Lysiloma_latisiliqua, Lysiloma_bahamensis sabicu, Lysiloma_sabicu nitta_tree Parkia_javanica manila_tamarind, camachile, huamachil, wild_tamarind, Pithecellobium_dulce cat's-claw, catclaw, black_bead, Pithecellodium_unguis-cati honey_mesquite, Western_honey_mesquite, Prosopis_glandulosa algarroba, algarrobilla, algarobilla screw_bean, screwbean, tornillo, screwbean_mesquite, Prosopis_pubescens screw_bean dogbane Indian_hemp, rheumatism_weed, Apocynum_cannabinum bushman's_poison, ordeal_tree, Acocanthera_oppositifolia, Acocanthera_venenata impala_lily, mock_azalia, desert_rose, kudu_lily, Adenium_obesum, Adenium_multiflorum allamanda common_allamanda, golden_trumpet, Allamanda_cathartica dita, dita_bark, devil_tree, Alstonia_scholaris Nepal_trumpet_flower, Easter_lily_vine, Beaumontia_grandiflora carissa hedge_thorn, natal_plum, Carissa_bispinosa natal_plum, amatungulu, Carissa_macrocarpa, Carissa_grandiflora periwinkle, rose_periwinkle, Madagascar_periwinkle, old_maid, Cape_periwinkle, red_periwinkle, cayenne_jasmine, Catharanthus_roseus, Vinca_rosea ivory_tree, conessi, kurchi, kurchee, Holarrhena_pubescens, Holarrhena_antidysenterica white_dipladenia, Mandevilla_boliviensis, Dipladenia_boliviensis Chilean_jasmine, Mandevilla_laxa oleander, rose_bay, Nerium_oleander frangipani, frangipanni West_Indian_jasmine, pagoda_tree, Plumeria_alba rauwolfia, rauvolfia snakewood, Rauwolfia_serpentina Strophanthus_kombe yellow_oleander, Thevetia_peruviana, Thevetia_neriifolia myrtle, Vinca_minor large_periwinkle, Vinca_major arum, aroid cuckoopint, lords-and-ladies, jack-in-the-pulpit, Arum_maculatum black_calla, Arum_palaestinum calamus alocasia, elephant's_ear, elephant_ear giant_taro, Alocasia_macrorrhiza amorphophallus pungapung, telingo_potato, elephant_yam, Amorphophallus_paeonifolius, Amorphophallus_campanulatus devil's_tongue, snake_palm, umbrella_arum, Amorphophallus_rivieri anthurium, tailflower, tail-flower flamingo_flower, flamingo_plant, Anthurium_andraeanum, Anthurium_scherzerianum jack-in-the-pulpit, Indian_turnip, wake-robin, Arisaema_triphyllum, Arisaema_atrorubens friar's-cowl, Arisarum_vulgare caladium Caladium_bicolor wild_calla, water_arum, Calla_palustris taro, taro_plant, dalo, dasheen, Colocasia_esculenta taro, cocoyam, dasheen, eddo cryptocoryne, water_trumpet dracontium golden_pothos, pothos, ivy_arum, Epipremnum_aureum, Scindapsus_aureus skunk_cabbage, Lysichiton_americanum monstera ceriman, Monstera_deliciosa nephthytis Nephthytis_afzelii arrow_arum green_arrow_arum, tuckahoe, Peltandra_virginica philodendron pistia, water_lettuce, water_cabbage, Pistia_stratiotes, Pistia_stratoites pothos spathiphyllum, peace_lily, spathe_flower skunk_cabbage, polecat_weed, foetid_pothos, Symplocarpus_foetidus yautia, tannia, spoonflower, malanga, Xanthosoma_sagittifolium, Xanthosoma_atrovirens calla_lily, calla, arum_lily, Zantedeschia_aethiopica pink_calla, Zantedeschia_rehmanii golden_calla duckweed common_duckweed, lesser_duckweed, Lemna_minor star-duckweed, Lemna_trisulca great_duckweed, water_flaxseed, Spirodela_polyrrhiza watermeal common_wolffia, Wolffia_columbiana aralia American_angelica_tree, devil's_walking_stick, Hercules'-club, Aralia_spinosa American_spikenard, petty_morel, life-of-man, Aralia_racemosa bristly_sarsaparilla, bristly_sarsparilla, dwarf_elder, Aralia_hispida Japanese_angelica_tree, Aralia_elata Chinese_angelica, Chinese_angelica_tree, Aralia_stipulata ivy, common_ivy, English_ivy, Hedera_helix puka, Meryta_sinclairii ginseng, nin-sin, Panax_ginseng, Panax_schinseng, Panax_pseudoginseng ginseng umbrella_tree, Schefflera_actinophylla, Brassaia_actinophylla birthwort, Aristolochia_clematitis Dutchman's-pipe, pipe_vine, Aristolochia_macrophylla, Aristolochia_durior Virginia_snakeroot, Virginia_serpentaria, Virginia_serpentary, Aristolochia_serpentaria Canada_ginger, black_snakeroot, Asarum_canadense heartleaf, heart-leaf, Asarum_virginicum heartleaf, heart-leaf, Asarum_shuttleworthii asarabacca, Asarum_europaeum caryophyllaceous_plant corn_cockle, corn_campion, crown-of-the-field, Agrostemma_githago sandwort mountain_sandwort, mountain_starwort, mountain_daisy, Arenaria_groenlandica pine-barren_sandwort, longroot, Arenaria_caroliniana seabeach_sandwort, Arenaria_peploides rock_sandwort, Arenaria_stricta thyme-leaved_sandwort, Arenaria_serpyllifolia mouse-ear_chickweed, mouse_eared_chickweed, mouse_ear, clammy_chickweed, chickweed snow-in-summer, love-in-a-mist, Cerastium_tomentosum Alpine_mouse-ear, Arctic_mouse-ear, Cerastium_alpinum pink, garden_pink sweet_William, Dianthus_barbatus carnation, clove_pink, gillyflower, Dianthus_caryophyllus china_pink, rainbow_pink, Dianthus_chinensis Japanese_pink, Dianthus_chinensis_heddewigii maiden_pink, Dianthus_deltoides cheddar_pink, Diangus_gratianopolitanus button_pink, Dianthus_latifolius cottage_pink, grass_pink, Dianthus_plumarius fringed_pink, Dianthus_supurbus drypis baby's_breath, babies'-breath, Gypsophila_paniculata coral_necklace, Illecebrum_verticullatum lychnis, catchfly ragged_robin, cuckoo_flower, Lychnis_flos-cuculi, Lychins_floscuculi scarlet_lychnis, maltese_cross, Lychins_chalcedonica mullein_pink, rose_campion, gardener's_delight, dusty_miller, Lychnis_coronaria sandwort, Moehringia_lateriflora sandwort, Moehringia_mucosa soapwort, hedge_pink, bouncing_Bet, bouncing_Bess, Saponaria_officinalis knawel, knawe, Scleranthus_annuus silene, campion, catchfly moss_campion, Silene_acaulis wild_pink, Silene_caroliniana red_campion, red_bird's_eye, Silene_dioica, Lychnis_dioica white_campion, evening_lychnis, white_cockle, bladder_campion, Silene_latifolia, Lychnis_alba fire_pink, Silene_virginica bladder_campion, Silene_uniflora, Silene_vulgaris corn_spurry, corn_spurrey, Spergula_arvensis sand_spurry, sea_spurry, Spergularia_rubra chickweed common_chickweed, Stellaria_media cowherb, cow_cockle, Vaccaria_hispanica, Vaccaria_pyramidata, Saponaria_vaccaria Hottentot_fig, Hottentot's_fig, sour_fig, Carpobrotus_edulis, Mesembryanthemum_edule livingstone_daisy, Dorotheanthus_bellidiformis fig_marigold, pebble_plant ice_plant, icicle_plant, Mesembryanthemum_crystallinum New_Zealand_spinach, Tetragonia_tetragonioides, Tetragonia_expansa amaranth amaranth tumbleweed, Amaranthus_albus, Amaranthus_graecizans prince's-feather, gentleman's-cane, prince's-plume, red_amaranth, purple_amaranth, Amaranthus_cruentus, Amaranthus_hybridus_hypochondriacus, Amaranthus_hybridus_erythrostachys pigweed, Amaranthus_hypochondriacus thorny_amaranth, Amaranthus_spinosus alligator_weed, alligator_grass, Alternanthera_philoxeroides cockscomb, common_cockscomb, Celosia_cristata, Celosia_argentea_cristata cottonweed globe_amaranth, bachelor's_button, Gomphrena_globosa bloodleaf saltwort, Batis_maritima lamb's-quarters, pigweed, wild_spinach, Chenopodium_album good-king-henry, allgood, fat_hen, wild_spinach, Chenopodium_bonus-henricus Jerusalem_oak, feather_geranium, Mexican_tea, Chenopodium_botrys, Atriplex_mexicana oak-leaved_goosefoot, oakleaf_goosefoot, Chenopodium_glaucum sowbane, red_goosefoot, Chenopodium_hybridum nettle-leaved_goosefoot, nettleleaf_goosefoot, Chenopodium_murale red_goosefoot, French_spinach, Chenopodium_rubrum stinking_goosefoot, Chenopodium_vulvaria orach, orache saltbush garden_orache, mountain_spinach, Atriplex_hortensis desert_holly, Atriplex_hymenelytra quail_bush, quail_brush, white_thistle, Atriplex_lentiformis beet, common_beet, Beta_vulgaris beetroot, Beta_vulgaris_rubra chard, Swiss_chard, spinach_beet, leaf_beet, chard_plant, Beta_vulgaris_cicla mangel-wurzel, mangold-wurzel, mangold, Beta_vulgaris_vulgaris winged_pigweed, tumbleweed, Cycloloma_atriplicifolium halogeton, Halogeton_glomeratus glasswort, samphire, Salicornia_europaea saltwort, barilla, glasswort, kali, kelpwort, Salsola_kali, Salsola_soda Russian_thistle, Russian_tumbleweed, Russian_cactus, tumbleweed, Salsola_kali_tenuifolia greasewood, black_greasewood, Sarcobatus_vermiculatus scarlet_musk_flower, Nyctaginia_capitata sand_verbena sweet_sand_verbena, Abronia_fragrans yellow_sand_verbena, Abronia_latifolia beach_pancake, Abronia_maritima beach_sand_verbena, pink_sand_verbena, Abronia_umbellata desert_sand_verbena, Abronia_villosa trailing_four_o'clock, trailing_windmills, Allionia_incarnata bougainvillea umbrellawort four_o'clock common_four-o'clock, marvel-of-Peru, Mirabilis_jalapa, Mirabilis_uniflora California_four_o'clock, Mirabilis_laevis, Mirabilis_californica sweet_four_o'clock, maravilla, Mirabilis_longiflora desert_four_o'clock, Colorado_four_o'clock, maravilla, Mirabilis_multiflora mountain_four_o'clock, Mirabilis_oblongifolia cockspur, Pisonia_aculeata rattail_cactus, rat's-tail_cactus, Aporocactus_flagelliformis saguaro, sahuaro, Carnegiea_gigantea night-blooming_cereus echinocactus, barrel_cactus hedgehog_cactus golden_barrel_cactus, Echinocactus_grusonii hedgehog_cereus rainbow_cactus epiphyllum, orchid_cactus barrel_cactus night-blooming_cereus chichipe, Lemaireocereus_chichipe mescal, mezcal, peyote, Lophophora_williamsii mescal_button, sacred_mushroom, magic_mushroom mammillaria feather_ball, Mammillaria_plumosa garambulla, garambulla_cactus, Myrtillocactus_geometrizans Knowlton's_cactus, Pediocactus_knowltonii nopal prickly_pear, prickly_pear_cactus cholla, Opuntia_cholla nopal, Opuntia_lindheimeri tuna, Opuntia_tuna Barbados_gooseberry, Barbados-gooseberry_vine, Pereskia_aculeata mistletoe_cactus Christmas_cactus, Schlumbergera_buckleyi, Schlumbergera_baridgesii night-blooming_cereus crab_cactus, Thanksgiving_cactus, Zygocactus_truncatus, Schlumbergera_truncatus pokeweed Indian_poke, Phytolacca_acinosa poke, pigeon_berry, garget, scoke, Phytolacca_americana ombu, bella_sombra, Phytolacca_dioica bloodberry, blood_berry, rougeberry, rouge_plant, Rivina_humilis portulaca rose_moss, sun_plant, Portulaca_grandiflora common_purslane, pussley, pussly, verdolagas, Portulaca_oleracea rock_purslane red_maids, redmaids, Calandrinia_ciliata Carolina_spring_beauty, Claytonia_caroliniana spring_beauty, Clatonia_lanceolata Virginia_spring_beauty, Claytonia_virginica siskiyou_lewisia, Lewisia_cotyledon bitterroot, Lewisia_rediviva broad-leaved_montia, Montia_cordifolia blinks, blinking_chickweed, water_chickweed, Montia_lamprosperma toad_lily, Montia_chamissoi winter_purslane, miner's_lettuce, Cuban_spinach, Montia_perfoliata flame_flower, flame-flower, flameflower, Talinum_aurantiacum pigmy_talinum, Talinum_brevifolium jewels-of-opar, Talinum_paniculatum caper native_pomegranate, Capparis_arborea caper_tree, Jamaica_caper_tree, Capparis_cynophallophora caper_tree, bay-leaved_caper, Capparis_flexuosa common_caper, Capparis_spinosa spiderflower, cleome Rocky_Mountain_bee_plant, stinking_clover, Cleome_serrulata clammyweed, Polanisia_graveolens, Polanisia_dodecandra crucifer, cruciferous_plant cress, cress_plant watercress stonecress, stone_cress garlic_mustard, hedge_garlic, sauce-alone, jack-by-the-hedge, Alliaria_officinalis alyssum, madwort rose_of_Jericho, resurrection_plant, Anastatica_hierochuntica Arabidopsis_thaliana, mouse-ear_cress Arabidopsis_lyrata rock_cress, rockcress sicklepod, Arabis_Canadensis tower_mustard, tower_cress, Turritis_glabra, Arabis_glabra horseradish, horseradish_root winter_cress, St._Barbara's_herb, scurvy_grass yellow_rocket, rockcress, rocket_cress, Barbarea_vulgaris, Sisymbrium_barbarea hoary_alison, hoary_alyssum, Berteroa_incana buckler_mustard, Biscutalla_laevigata wild_cabbage, Brassica_oleracea cabbage, cultivated_cabbage, Brassica_oleracea head_cabbage, head_cabbage_plant, Brassica_oleracea_capitata savoy_cabbage brussels_sprout, Brassica_oleracea_gemmifera cauliflower, Brassica_oleracea_botrytis broccoli, Brassica_oleracea_italica collard kohlrabi, Brassica_oleracea_gongylodes turnip_plant turnip, white_turnip, Brassica_rapa rutabaga, turnip_cabbage, swede, Swedish_turnip, rutabaga_plant, Brassica_napus_napobrassica broccoli_raab, broccoli_rabe, Brassica_rapa_ruvo mustard chinese_mustard, indian_mustard, leaf_mustard, gai_choi, Brassica_juncea bok_choy, bok_choi, pakchoi, pak_choi, Chinese_white_cabbage, Brassica_rapa_chinensis rape, colza, Brassica_napus rapeseed shepherd's_purse, shepherd's_pouch, Capsella_bursa-pastoris lady's_smock, cuckooflower, cuckoo_flower, meadow_cress, Cardamine_pratensis coral-root_bittercress, coralroot, coralwort, Cardamine_bulbifera, Dentaria_bulbifera crinkleroot, crinkle-root, crinkle_root, pepper_root, toothwort, Cardamine_diphylla, Dentaria_diphylla American_watercress, mountain_watercress, Cardamine_rotundifolia spring_cress, Cardamine_bulbosa purple_cress, Cardamine_douglasii wallflower, Cheiranthus_cheiri, Erysimum_cheiri prairie_rocket scurvy_grass, common_scurvy_grass, Cochlearia_officinalis sea_kale, sea_cole, Crambe_maritima tansy_mustard, Descurainia_pinnata draba wallflower prairie_rocket Siberian_wall_flower, Erysimum_allionii, Cheiranthus_allionii western_wall_flower, Erysimum_asperum, Cheiranthus_asperus, Erysimum_arkansanum wormseed_mustard, Erysimum_cheiranthoides heliophila damask_violet, Dame's_violet, sweet_rocket, Hesperis_matronalis tansy-leaved_rocket, Hugueninia_tanacetifolia, Sisymbrium_tanacetifolia candytuft woad dyer's_woad, Isatis_tinctoria bladderpod sweet_alyssum, sweet_alison, Lobularia_maritima Malcolm_stock, stock Virginian_stock, Virginia_stock, Malcolmia_maritima stock, gillyflower brompton_stock, Matthiola_incana bladderpod chamois_cress, Pritzelago_alpina, Lepidium_alpina radish_plant, radish jointed_charlock, wild_radish, wild_rape, runch, Raphanus_raphanistrum radish, Raphanus_sativus radish, daikon, Japanese_radish, Raphanus_sativus_longipinnatus marsh_cress, yellow_watercress, Rorippa_islandica great_yellowcress, Rorippa_amphibia, Nasturtium_amphibium schizopetalon, Schizopetalon_walkeri field_mustard, wild_mustard, charlock, chadlock, Brassica_kaber, Sinapis_arvensis hedge_mustard, Sisymbrium_officinale desert_plume, prince's-plume, Stanleya_pinnata, Cleome_pinnata pennycress field_pennycress, French_weed, fanweed, penny_grass, stinkweed, mithridate_mustard, Thlaspi_arvense fringepod, lacepod bladderpod wasabi poppy Iceland_poppy, Papaver_alpinum western_poppy, Papaver_californicum prickly_poppy, Papaver_argemone Iceland_poppy, arctic_poppy, Papaver_nudicaule oriental_poppy, Papaver_orientale corn_poppy, field_poppy, Flanders_poppy, Papaver_rhoeas opium_poppy, Papaver_somniferum prickly_poppy, argemone, white_thistle, devil's_fig Mexican_poppy, Argemone_mexicana bocconia, tree_celandine, Bocconia_frutescens celandine, greater_celandine, swallowwort, swallow_wort, Chelidonium_majus corydalis climbing_corydalis, Corydalis_claviculata, Fumaria_claviculata California_poppy, Eschscholtzia_californica horn_poppy, horned_poppy, yellow_horned_poppy, sea_poppy, Glaucium_flavum golden_cup, Mexican_tulip_poppy, Hunnemania_fumariifolia plume_poppy, bocconia, Macleaya_cordata blue_poppy, Meconopsis_betonicifolia Welsh_poppy, Meconopsis_cambrica creamcups, Platystemon_californicus matilija_poppy, California_tree_poppy, Romneya_coulteri wind_poppy, flaming_poppy, Stylomecon_heterophyllum, Papaver_heterophyllum celandine_poppy, wood_poppy, Stylophorum_diphyllum climbing_fumitory, Allegheny_vine, Adlumia_fungosa, Fumaria_fungosa bleeding_heart, lyreflower, lyre-flower, Dicentra_spectabilis Dutchman's_breeches, Dicentra_cucullaria squirrel_corn, Dicentra_canadensis composite, composite_plant compass_plant, compass_flower everlasting, everlasting_flower achillea yarrow, milfoil, Achillea_millefolium pink-and-white_everlasting, pink_paper_daisy, Acroclinium_roseum white_snakeroot, white_sanicle, Ageratina_altissima, Eupatorium_rugosum ageratum common_ageratum, Ageratum_houstonianum sweet_sultan, Amberboa_moschata, Centaurea_moschata ragweed, ambrosia, bitterweed common_ragweed, Ambrosia_artemisiifolia great_ragweed, Ambrosia_trifida western_ragweed, perennial_ragweed, Ambrosia_psilostachya ammobium winged_everlasting, Ammobium_alatum pellitory, pellitory-of-Spain, Anacyclus_pyrethrum pearly_everlasting, cottonweed, Anaphalis_margaritacea andryala plantain-leaved_pussytoes field_pussytoes solitary_pussytoes mountain_everlasting mayweed, dog_fennel, stinking_mayweed, stinking_chamomile, Anthemis_cotula yellow_chamomile, golden_marguerite, dyers'_chamomile, Anthemis_tinctoria corn_chamomile, field_chamomile, corn_mayweed, Anthemis_arvensis woolly_daisy, dwarf_daisy, Antheropeas_wallacei, Eriophyllum_wallacei burdock, clotbur great_burdock, greater_burdock, cocklebur, Arctium_lappa African_daisy blue-eyed_African_daisy, Arctotis_stoechadifolia, Arctotis_venusta marguerite, marguerite_daisy, Paris_daisy, Chrysanthemum_frutescens, Argyranthemum_frutescens silversword, Argyroxiphium_sandwicense arnica heartleaf_arnica, Arnica_cordifolia Arnica_montana lamb_succory, dwarf_nipplewort, Arnoseris_minima artemisia mugwort sweet_wormwood, Artemisia_annua field_wormwood, Artemisia_campestris tarragon, estragon, Artemisia_dracunculus sand_sage, silvery_wormwood, Artemisia_filifolia wormwood_sage, prairie_sagewort, Artemisia_frigida western_mugwort, white_sage, cudweed, prairie_sage, Artemisia_ludoviciana, Artemisia_gnaphalodes Roman_wormwood, Artemis_pontica bud_brush, bud_sagebrush, Artemis_spinescens common_mugwort, Artemisia_vulgaris aster wood_aster whorled_aster, Aster_acuminatus heath_aster, Aster_arenosus heart-leaved_aster, Aster_cordifolius white_wood_aster, Aster_divaricatus bushy_aster, Aster_dumosus heath_aster, Aster_ericoides white_prairie_aster, Aster_falcatus stiff_aster, Aster_linarifolius goldilocks, goldilocks_aster, Aster_linosyris, Linosyris_vulgaris large-leaved_aster, Aster_macrophyllus New_England_aster, Aster_novae-angliae Michaelmas_daisy, New_York_aster, Aster_novi-belgii upland_white_aster, Aster_ptarmicoides Short's_aster, Aster_shortii sea_aster, sea_starwort, Aster_tripolium prairie_aster, Aster_turbinellis annual_salt-marsh_aster aromatic_aster arrow_leaved_aster azure_aster bog_aster crooked-stemmed_aster Eastern_silvery_aster flat-topped_white_aster late_purple_aster panicled_aster perennial_salt_marsh_aster purple-stemmed_aster rough-leaved_aster rush_aster Schreiber's_aster small_white_aster smooth_aster southern_aster starved_aster, calico_aster tradescant's_aster wavy-leaved_aster Western_silvery_aster willow_aster ayapana, Ayapana_triplinervis, Eupatorium_aya-pana mule_fat, Baccharis_viminea balsamroot daisy common_daisy, English_daisy, Bellis_perennis bur_marigold, burr_marigold, beggar-ticks, beggar's-ticks, sticktight Spanish_needles, Bidens_bipinnata tickseed_sunflower, Bidens_coronata, Bidens_trichosperma European_beggar-ticks, trifid_beggar-ticks, trifid_bur_marigold, Bidens_tripartita slender_knapweed false_chamomile Swan_River_daisy, Brachycome_Iberidifolia woodland_oxeye, Buphthalmum_salicifolium Indian_plantain calendula common_marigold, pot_marigold, ruddles, Scotch_marigold, Calendula_officinalis China_aster, Callistephus_chinensis thistle welted_thistle, Carduus_crispus musk_thistle, nodding_thistle, Carduus_nutans carline_thistle stemless_carline_thistle, Carlina_acaulis common_carline_thistle, Carlina_vulgaris safflower, false_saffron, Carthamus_tinctorius safflower_seed catananche blue_succory, cupid's_dart, Catananche_caerulea centaury dusty_miller, Centaurea_cineraria, Centaurea_gymnocarpa cornflower, bachelor's_button, bluebottle, Centaurea_cyanus star-thistle, caltrop, Centauria_calcitrapa knapweed sweet_sultan, Centaurea_imperialis great_knapweed, greater_knapweed, Centaurea_scabiosa Barnaby's_thistle, yellow_star-thistle, Centaurea_solstitialis chamomile, camomile, Chamaemelum_nobilis, Anthemis_nobilis chaenactis chrysanthemum corn_marigold, field_marigold, Chrysanthemum_segetum crown_daisy, Chrysanthemum_coronarium chop-suey_greens, tong_ho, shun_giku, Chrysanthemum_coronarium_spatiosum golden_aster Maryland_golden_aster, Chrysopsis_mariana goldenbush rabbit_brush, rabbit_bush, Chrysothamnus_nauseosus chicory, succory, chicory_plant, Cichorium_intybus endive, witloof, Cichorium_endivia chicory, chicory_root plume_thistle, plumed_thistle Canada_thistle, creeping_thistle, Cirsium_arvense field_thistle, Cirsium_discolor woolly_thistle, Cirsium_flodmanii European_woolly_thistle, Cirsium_eriophorum melancholy_thistle, Cirsium_heterophylum, Cirsium_helenioides brook_thistle, Cirsium_rivulare bull_thistle, boar_thistle, spear_thistle, Cirsium_vulgare, Cirsium_lanceolatum blessed_thistle, sweet_sultan, Cnicus_benedictus mistflower, mist-flower, ageratum, Conoclinium_coelestinum, Eupatorium_coelestinum horseweed, Canadian_fleabane, fleabane, Conyza_canadensis, Erigeron_canadensis coreopsis, tickseed, tickweed, tick-weed giant_coreopsis, Coreopsis_gigantea sea_dahlia, Coreopsis_maritima calliopsis, Coreopsis_tinctoria cosmos, cosmea brass_buttons, Cotula_coronopifolia billy_buttons hawk's-beard, hawk's-beards artichoke, globe_artichoke, artichoke_plant, Cynara_scolymus cardoon, Cynara_cardunculus dahlia, Dahlia_pinnata German_ivy, Delairea_odorata, Senecio_milkanioides florist's_chrysanthemum, florists'_chrysanthemum, mum, Dendranthema_grandifloruom, Chrysanthemum_morifolium cape_marigold, sun_marigold, star_of_the_veldt leopard's-bane, leopardbane coneflower globe_thistle elephant's-foot tassel_flower, Emilia_sagitta brittlebush, brittle_bush, incienso, Encelia_farinosa sunray, Enceliopsis_nudicaulis engelmannia fireweed, Erechtites_hieracifolia fleabane blue_fleabane, Erigeron_acer daisy_fleabane, Erigeron_annuus orange_daisy, orange_fleabane, Erigeron_aurantiacus spreading_fleabane, Erigeron_divergens seaside_daisy, beach_aster, Erigeron_glaucous Philadelphia_fleabane, Erigeron_philadelphicus robin's_plantain, Erigeron_pulchellus showy_daisy, Erigeron_speciosus woolly_sunflower golden_yarrow, Eriophyllum_lanatum dog_fennel, Eupatorium_capillifolium Joe-Pye_weed, spotted_Joe-Pye_weed, Eupatorium_maculatum boneset, agueweed, thoroughwort, Eupatorium_perfoliatum Joe-Pye_weed, purple_boneset, trumpet_weed, marsh_milkweed, Eupatorium_purpureum blue_daisy, blue_marguerite, Felicia_amelloides kingfisher_daisy, Felicia_bergeriana cotton_rose, cudweed, filago herba_impia, Filago_germanica gaillardia gazania treasure_flower, Gazania_rigens African_daisy Barberton_daisy, Transvaal_daisy, Gerbera_jamesonii desert_sunflower, Gerea_canescens cudweed chafeweed, wood_cudweed, Gnaphalium_sylvaticum gumweed, gum_plant, tarweed, rosinweed Grindelia_robusta curlycup_gumweed, Grindelia_squarrosa little-head_snakeweed, Gutierrezia_microcephala rabbitweed, rabbit-weed, snakeweed, broom_snakeweed, broom_snakeroot, turpentine_weed, Gutierrezia_sarothrae broomweed, broom-weed, Gutierrezia_texana velvet_plant, purple_velvet_plant, royal_velvet_plant, Gynura_aurantiaca goldenbush camphor_daisy, Haplopappus_phyllocephalus yellow_spiny_daisy, Haplopappus_spinulosus hoary_golden_bush, Hazardia_cana sneezeweed orange_sneezeweed, owlclaws, Helenium_hoopesii rosilla, Helenium_puberulum sunflower, helianthus swamp_sunflower, Helianthus_angustifolius common_sunflower, mirasol, Helianthus_annuus giant_sunflower, tall_sunflower, Indian_potato, Helianthus_giganteus showy_sunflower, Helianthus_laetiflorus Maximilian's_sunflower, Helianthus_maximilianii prairie_sunflower, Helianthus_petiolaris Jerusalem_artichoke, girasol, Jerusalem_artichoke_sunflower, Helianthus_tuberosus Jerusalem_artichoke strawflower, golden_everlasting, yellow_paper_daisy, Helichrysum_bracteatum heliopsis, oxeye strawflower hairy_golden_aster, prairie_golden_aster, Heterotheca_villosa, Chrysopsis_villosa hawkweed rattlesnake_weed, Hieracium_venosum alpine_coltsfoot, Homogyne_alpina, Tussilago_alpina alpine_gold, alpine_hulsea, Hulsea_algida dwarf_hulsea, Hulsea_nana cat's-ear, California_dandelion, capeweed, gosmore, Hypochaeris_radicata inula marsh_elder, iva burweed_marsh_elder, false_ragweed, Iva_xanthifolia krigia dwarf_dandelion, Krigia_dandelion, Krigia_bulbosa garden_lettuce, common_lettuce, Lactuca_sativa cos_lettuce, romaine_lettuce, Lactuca_sativa_longifolia leaf_lettuce, Lactuca_sativa_crispa celtuce, stem_lettuce, Lactuca_sativa_asparagina prickly_lettuce, horse_thistle, Lactuca_serriola, Lactuca_scariola goldfields, Lasthenia_chrysostoma tidytips, tidy_tips, Layia_platyglossa hawkbit fall_dandelion, arnica_bud, Leontodon_autumnalis edelweiss, Leontopodium_alpinum oxeye_daisy, ox-eyed_daisy, marguerite, moon_daisy, white_daisy, Leucanthemum_vulgare, Chrysanthemum_leucanthemum oxeye_daisy, Leucanthemum_maximum, Chrysanthemum_maximum shasta_daisy, Leucanthemum_superbum, Chrysanthemum_maximum_maximum Pyrenees_daisy, Leucanthemum_lacustre, Chrysanthemum_lacustre north_island_edelweiss, Leucogenes_leontopodium blazing_star, button_snakeroot, gayfeather, gay-feather, snakeroot dotted_gayfeather, Liatris_punctata dense_blazing_star, Liatris_pycnostachya Texas_star, Lindheimera_texana African_daisy, yellow_ageratum, Lonas_inodora, Lonas_annua tahoka_daisy, tansy_leaf_aster, Machaeranthera_tanacetifolia sticky_aster, Machaeranthera_bigelovii Mojave_aster, Machaeranthera_tortifoloia tarweed sweet_false_chamomile, wild_chamomile, German_chamomile, Matricaria_recutita, Matricaria_chamomilla pineapple_weed, rayless_chamomile, Matricaria_matricarioides climbing_hempweed, climbing_boneset, wild_climbing_hempweed, climbing_hemp-vine, Mikania_scandens mutisia rattlesnake_root white_lettuce, cankerweed, Nabalus_alba, Prenanthes_alba daisybush, daisy-bush, daisy_bush New_Zealand_daisybush, Olearia_haastii cotton_thistle, woolly_thistle, Scotch_thistle, Onopordum_acanthium, Onopordon_acanthium othonna cascade_everlasting, Ozothamnus_secundiflorus, Helichrysum_secundiflorum butterweed American_feverfew, wild_quinine, prairie_dock, Parthenium_integrifolium cineraria, Pericallis_cruenta, Senecio_cruentus florest's_cineraria, Pericallis_hybrida butterbur, bog_rhubarb, Petasites_hybridus, Petasites_vulgaris winter_heliotrope, sweet_coltsfoot, Petasites_fragrans sweet_coltsfoot, Petasites_sagitattus oxtongue, bristly_oxtongue, bitterweed, bugloss, Picris_echioides hawkweed mouse-ear_hawkweed, Pilosella_officinarum, Hieracium_pilocella stevia rattlesnake_root, Prenanthes_purpurea fleabane, feabane_mullet, Pulicaria_dysenterica sheep_plant, vegetable_sheep, Raoulia_lutescens, Raoulia_australis coneflower Mexican_hat, Ratibida_columnaris long-head_coneflower, prairie_coneflower, Ratibida_columnifera prairie_coneflower, Ratibida_tagetes Swan_River_everlasting, rhodanthe, Rhodanthe_manglesii, Helipterum_manglesii coneflower black-eyed_Susan, Rudbeckia_hirta, Rudbeckia_serotina cutleaved_coneflower, Rudbeckia_laciniata golden_glow, double_gold, hortensia, Rudbeckia_laciniata_hortensia lavender_cotton, Santolina_chamaecyparissus creeping_zinnia, Sanvitalia_procumbens golden_thistle Spanish_oyster_plant, Scolymus_hispanicus nodding_groundsel, Senecio_bigelovii dusty_miller, Senecio_cineraria, Cineraria_maritima butterweed, ragwort, Senecio_glabellus ragwort, tansy_ragwort, ragweed, benweed, Senecio_jacobaea arrowleaf_groundsel, Senecio_triangularis black_salsify, viper's_grass, scorzonera, Scorzonera_hispanica white-topped_aster narrow-leaved_white-topped_aster silver_sage, silver_sagebrush, grey_sage, gray_sage, Seriphidium_canum, Artemisia_cana sea_wormwood, Seriphidium_maritimum, Artemisia_maritima sawwort, Serratula_tinctoria rosinweed, Silphium_laciniatum milk_thistle, lady's_thistle, Our_Lady's_mild_thistle, holy_thistle, blessed_thistle, Silybum_marianum goldenrod silverrod, Solidago_bicolor meadow_goldenrod, Canadian_goldenrod, Solidago_canadensis Missouri_goldenrod, Solidago_missouriensis alpine_goldenrod, Solidago_multiradiata grey_goldenrod, gray_goldenrod, Solidago_nemoralis Blue_Mountain_tea, sweet_goldenrod, Solidago_odora dyer's_weed, Solidago_rugosa seaside_goldenrod, beach_goldenrod, Solidago_sempervirens narrow_goldenrod, Solidago_spathulata Boott's_goldenrod Elliott's_goldenrod Ohio_goldenrod rough-stemmed_goldenrod showy_goldenrod tall_goldenrod zigzag_goldenrod, broad_leaved_goldenrod sow_thistle, milk_thistle milkweed, Sonchus_oleraceus stevia stokes'_aster, cornflower_aster, Stokesia_laevis marigold African_marigold, big_marigold, Aztec_marigold, Tagetes_erecta French_marigold, Tagetes_patula painted_daisy, pyrethrum, Tanacetum_coccineum, Chrysanthemum_coccineum pyrethrum, Dalmatian_pyrethrum, Dalmatia_pyrethrum, Tanacetum_cinerariifolium, Chrysanthemum_cinerariifolium northern_dune_tansy, Tanacetum_douglasii feverfew, Tanacetum_parthenium, Chrysanthemum_parthenium dusty_miller, silver-lace, silver_lace, Tanacetum_ptarmiciflorum, Chrysanthemum_ptarmiciflorum tansy, golden_buttons, scented_fern, Tanacetum_vulgare dandelion, blowball common_dandelion, Taraxacum_ruderalia, Taraxacum_officinale dandelion_green Russian_dandelion, kok-saghyz, kok-sagyz, Taraxacum_kok-saghyz stemless_hymenoxys, Tetraneuris_acaulis, Hymenoxys_acaulis Mexican_sunflower, tithonia Easter_daisy, stemless_daisy, Townsendia_Exscapa yellow_salsify, Tragopogon_dubius salsify, oyster_plant, vegetable_oyster, Tragopogon_porrifolius meadow_salsify, goatsbeard, shepherd's_clock, Tragopogon_pratensis scentless_camomile, scentless_false_camomile, scentless_mayweed, scentless_hayweed, corn_mayweed, Tripleurospermum_inodorum, Matricaria_inodorum turfing_daisy, Tripleurospermum_tchihatchewii, Matricaria_tchihatchewii coltsfoot, Tussilago_farfara ursinia crownbeard, crown-beard, crown_beard wingstem, golden_ironweed, yellow_ironweed, golden_honey_plant, Verbesina_alternifolia, Actinomeris_alternifolia cowpen_daisy, golden_crownbeard, golden_crown_beard, butter_daisy, Verbesina_encelioides, Ximenesia_encelioides gravelweed, Verbesina_helianthoides Virginia_crownbeard, frostweed, frost-weed, Verbesina_virginica ironweed, vernonia mule's_ears, Wyethia_amplexicaulis white-rayed_mule's_ears, Wyethia_helianthoides cocklebur, cockle-bur, cockleburr, cockle-burr xeranthemum immortelle, Xeranthemum_annuum zinnia, old_maid, old_maid_flower white_zinnia, Zinnia_acerosa little_golden_zinnia, Zinnia_grandiflora blazing_star, Mentzelia_livicaulis, Mentzelia_laevicaulis bartonia, Mentzelia_lindleyi achene samara, key_fruit, key campanula, bellflower creeping_bellflower, Campanula_rapunculoides Canterbury_bell, cup_and_saucer, Campanula_medium tall_bellflower, Campanula_americana marsh_bellflower, Campanula_aparinoides clustered_bellflower, Campanula_glomerata peach_bells, peach_bell, willow_bell, Campanula_persicifolia chimney_plant, chimney_bellflower, Campanula_pyramidalis rampion, rampion_bellflower, Campanula_rapunculus tussock_bellflower, spreading_bellflower, Campanula_carpatica orchid, orchidaceous_plant orchis male_orchis, early_purple_orchid, Orchis_mascula butterfly_orchid, butterfly_orchis, Orchis_papilionaceae showy_orchis, purple_orchis, purple-hooded_orchis, Orchis_spectabilis aerides angrecum jewel_orchid puttyroot, adam-and-eve, Aplectrum_hyemale arethusa bog_rose, wild_pink, dragon's_mouth, Arethusa_bulbosa bletia Bletilla_striata, Bletia_striata brassavola spider_orchid, Brassia_lawrenceana spider_orchid, Brassia_verrucosa caladenia calanthe grass_pink, Calopogon_pulchellum, Calopogon_tuberosum calypso, fairy-slipper, Calypso_bulbosa cattleya helleborine red_helleborine, Cephalanthera_rubra spreading_pogonia, funnel-crest_rosebud_orchid, Cleistes_divaricata, Pogonia_divaricata rosebud_orchid, Cleistes_rosea, Pogonia_rosea satyr_orchid, Coeloglossum_bracteatum frog_orchid, Coeloglossum_viride coelogyne coral_root spotted_coral_root, Corallorhiza_maculata striped_coral_root, Corallorhiza_striata early_coral_root, pale_coral_root, Corallorhiza_trifida swan_orchid, swanflower, swan-flower, swanneck, swan-neck cymbid, cymbidium cypripedia lady's_slipper, lady-slipper, ladies'_slipper, slipper_orchid moccasin_flower, nerveroot, Cypripedium_acaule common_lady's-slipper, showy_lady's-slipper, showy_lady_slipper, Cypripedium_reginae, Cypripedium_album ram's-head, ram's-head_lady's_slipper, Cypripedium_arietinum yellow_lady's_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum large_yellow_lady's_slipper, Cypripedium_calceolus_pubescens California_lady's_slipper, Cypripedium_californicum clustered_lady's_slipper, Cypripedium_fasciculatum mountain_lady's_slipper, Cypripedium_montanum marsh_orchid common_spotted_orchid, Dactylorhiza_fuchsii, Dactylorhiza_maculata_fuchsii dendrobium disa phantom_orchid, snow_orchid, Eburophyton_austinae tulip_orchid, Encyclia_citrina, Cattleya_citrina butterfly_orchid, Encyclia_tampensis, Epidendrum_tampense butterfly_orchid, butterfly_orchis, Epidendrum_venosum, Encyclia_venosa epidendron helleborine Epipactis_helleborine stream_orchid, chatterbox, giant_helleborine, Epipactis_gigantea tongueflower, tongue-flower rattlesnake_plantain, helleborine fragrant_orchid, Gymnadenia_conopsea short-spurred_fragrant_orchid, Gymnadenia_odoratissima fringed_orchis, fringed_orchid frog_orchid rein_orchid, rein_orchis bog_rein_orchid, bog_candles, Habenaria_dilatata white_fringed_orchis, white_fringed_orchid, Habenaria_albiflora elegant_Habenaria, Habenaria_elegans purple-fringed_orchid, purple-fringed_orchis, Habenaria_fimbriata coastal_rein_orchid, Habenaria_greenei Hooker's_orchid, Habenaria_hookeri ragged_orchid, ragged_orchis, ragged-fringed_orchid, green_fringed_orchis, Habenaria_lacera prairie_orchid, prairie_white-fringed_orchis, Habenaria_leucophaea snowy_orchid, Habenaria_nivea round-leaved_rein_orchid, Habenaria_orbiculata purple_fringeless_orchid, purple_fringeless_orchis, Habenaria_peramoena purple-fringed_orchid, purple-fringed_orchis, Habenaria_psycodes Alaska_rein_orchid, Habenaria_unalascensis crested_coral_root, Hexalectris_spicata Texas_purple_spike, Hexalectris_warnockii lizard_orchid, Himantoglossum_hircinum laelia liparis twayblade fen_orchid, fen_orchis, Liparis_loeselii broad-leaved_twayblade, Listera_convallarioides lesser_twayblade, Listera_cordata twayblade, Listera_ovata green_adder's_mouth, Malaxis-unifolia, Malaxis_ophioglossoides masdevallia maxillaria pansy_orchid odontoglossum oncidium, dancing_lady_orchid, butterfly_plant, butterfly_orchid bee_orchid, Ophrys_apifera fly_orchid, Ophrys_insectifera, Ophrys_muscifera spider_orchid early_spider_orchid, Ophrys_sphegodes Venus'_slipper, Venus's_slipper, Venus's_shoe phaius moth_orchid, moth_plant butterfly_plant, Phalaenopsis_amabilis rattlesnake_orchid lesser_butterfly_orchid, Platanthera_bifolia, Habenaria_bifolia greater_butterfly_orchid, Platanthera_chlorantha, Habenaria_chlorantha prairie_white-fringed_orchid, Platanthera_leucophea tangle_orchid Indian_crocus pleurothallis pogonia butterfly_orchid Psychopsis_krameriana, Oncidium_papilio_kramerianum Psychopsis_papilio, Oncidium_papilio helmet_orchid, greenhood foxtail_orchid orange-blossom_orchid, Sarcochilus_falcatus sobralia ladies'_tresses, lady's_tresses screw_augur, Spiranthes_cernua hooded_ladies'_tresses, Spiranthes_romanzoffiana western_ladies'_tresses, Spiranthes_porrifolia European_ladies'_tresses, Spiranthes_spiralis stanhopea stelis fly_orchid vanda blue_orchid, Vanda_caerulea vanilla vanilla_orchid, Vanilla_planifolia yam, yam_plant yam white_yam, water_yam, Dioscorea_alata cinnamon_vine, Chinese_yam, Dioscorea_batata elephant's-foot, tortoise_plant, Hottentot_bread_vine, Hottentot's_bread_vine, Dioscorea_elephantipes wild_yam, Dioscorea_paniculata cush-cush, Dioscorea_trifida black_bryony, black_bindweed, Tamus_communis primrose, primula English_primrose, Primula_vulgaris cowslip, paigle, Primula_veris oxlip, paigle, Primula_elatior Chinese_primrose, Primula_sinensis polyanthus, Primula_polyantha pimpernel scarlet_pimpernel, red_pimpernel, poor_man's_weatherglass, Anagallis_arvensis bog_pimpernel, Anagallis_tenella chaffweed, bastard_pimpernel, false_pimpernel cyclamen, Cyclamen_purpurascens sowbread, Cyclamen_hederifolium, Cyclamen_neopolitanum sea_milkwort, sea_trifoly, black_saltwort, Glaux_maritima featherfoil, feather-foil water_gillyflower, American_featherfoil, Hottonia_inflata water_violet, Hottonia_palustris loosestrife gooseneck_loosestrife, Lysimachia_clethroides_Duby yellow_pimpernel, Lysimachia_nemorum fringed_loosestrife, Lysimachia_ciliatum moneywort, creeping_Jenny, creeping_Charlie, Lysimachia_nummularia swamp_candles, Lysimachia_terrestris whorled_loosestrife, Lysimachia_quadrifolia water_pimpernel brookweed, Samolus_valerandii brookweed, Samolus_parviflorus, Samolus_floribundus coralberry, spiceberry, Ardisia_crenata marlberry, Ardisia_escallonoides, Ardisia_paniculata plumbago leadwort, Plumbago_europaea thrift sea_lavender, marsh_rosemary, statice barbasco, joewood, Jacquinia_keyensis gramineous_plant, graminaceous_plant grass midgrass shortgrass, short-grass sword_grass tallgrass, tall-grass herbage, pasturage goat_grass, Aegilops_triuncalis wheatgrass, wheat-grass crested_wheatgrass, crested_wheat_grass, fairway_crested_wheat_grass, Agropyron_cristatum bearded_wheatgrass, Agropyron_subsecundum western_wheatgrass, bluestem_wheatgrass, Agropyron_smithii intermediate_wheatgrass, Agropyron_intermedium, Elymus_hispidus slender_wheatgrass, Agropyron_trachycaulum, Agropyron_pauciflorum, Elymus_trachycaulos velvet_bent, velvet_bent_grass, brown_bent, Rhode_Island_bent, dog_bent, Agrostis_canina cloud_grass, Agrostis_nebulosa meadow_foxtail, Alopecurus_pratensis foxtail, foxtail_grass broom_grass broom_sedge, Andropogon_virginicus tall_oat_grass, tall_meadow_grass, evergreen_grass, false_oat, French_rye, Arrhenatherum_elatius toetoe, toitoi, Arundo_conspicua, Chionochloa_conspicua oat cereal_oat, Avena_sativa wild_oat, wild_oat_grass, Avena_fatua slender_wild_oat, Avena_barbata wild_red_oat, animated_oat, Avene_sterilis brome, bromegrass chess, cheat, Bromus_secalinus field_brome, Bromus_arvensis grama, grama_grass, gramma, gramma_grass black_grama, Bouteloua_eriopoda buffalo_grass, Buchloe_dactyloides reed_grass feather_reed_grass, feathertop, Calamagrostis_acutiflora Australian_reed_grass, Calamagrostic_quadriseta burgrass, bur_grass buffel_grass, Cenchrus_ciliaris, Pennisetum_cenchroides Rhodes_grass, Chloris_gayana pampas_grass, Cortaderia_selloana giant_star_grass, Cynodon_plectostachyum orchard_grass, cocksfoot, cockspur, Dactylis_glomerata Egyptian_grass, crowfoot_grass, Dactyloctenium_aegypticum crabgrass, crab_grass, finger_grass smooth_crabgrass, Digitaria_ischaemum large_crabgrass, hairy_finger_grass, Digitaria_sanguinalis barnyard_grass, barn_grass, barn_millet, Echinochloa_crusgalli Japanese_millet, billion-dollar_grass, Japanese_barnyard_millet, sanwa_millet, Echinochloa_frumentacea yardgrass, yard_grass, wire_grass, goose_grass, Eleusine_indica finger_millet, ragi, ragee, African_millet, coracan, corakan, kurakkan, Eleusine_coracana lyme_grass wild_rye giant_ryegrass, Elymus_condensatus, Leymus_condensatus sea_lyme_grass, European_dune_grass, Elymus_arenarius, Leymus_arenaria Canada_wild_rye, Elymus_canadensis teff, teff_grass, Eragrostis_tef, Eragrostic_abyssinica weeping_love_grass, African_love_grass, Eragrostis_curvula plume_grass Ravenna_grass, wool_grass, Erianthus_ravennae fescue, fescue_grass, meadow_fescue, Festuca_elatior reed_meadow_grass, Glyceria_grandis velvet_grass, Yorkshire_fog, Holcus_lanatus creeping_soft_grass, Holcus_mollis barleycorn barley_grass, wall_barley, Hordeum_murinum little_barley, Hordeum_pusillum rye_grass, ryegrass perennial_ryegrass, English_ryegrass, Lolium_perenne Italian_ryegrass, Italian_rye, Lolium_multiflorum darnel, tare, bearded_darnel, cheat, Lolium_temulentum nimblewill, nimble_Will, Muhlenbergia_schreberi cultivated_rice, Oryza_sativa ricegrass, rice_grass smilo, smilo_grass, Oryzopsis_miliacea switch_grass, Panicum_virgatum broomcorn_millet, hog_millet, Panicum_miliaceum goose_grass, Texas_millet, Panicum_Texanum dallisgrass, dallis_grass, paspalum, Paspalum_dilatatum Bahia_grass, Paspalum_notatum knotgrass, Paspalum_distichum fountain_grass, Pennisetum_ruppelii, Pennisetum_setaceum reed_canary_grass, gardener's_garters, lady's_laces, ribbon_grass, Phalaris_arundinacea canary_grass, birdseed_grass, Phalaris_canariensis timothy, herd's_grass, Phleum_pratense bluegrass, blue_grass meadowgrass, meadow_grass wood_meadowgrass, Poa_nemoralis, Agrostis_alba noble_cane munj, munja, Saccharum_bengalense, Saccharum_munja broom_beard_grass, prairie_grass, wire_grass, Andropogon_scoparius, Schizachyrium_scoparium bluestem, blue_stem, Andropogon_furcatus, Andropogon_gerardii rye, Secale_cereale bristlegrass, bristle_grass giant_foxtail yellow_bristlegrass, yellow_bristle_grass, yellow_foxtail, glaucous_bristlegrass, Setaria_glauca green_bristlegrass, green_foxtail, rough_bristlegrass, bottle-grass, bottle_grass, Setaria_viridis Siberian_millet, Setaria_italica_rubrofructa German_millet, golden_wonder_millet, Setaria_italica_stramineofructa millet rattan, rattan_cane malacca reed sorghum grain_sorghum durra, doura, dourah, Egyptian_corn, Indian_millet, Guinea_corn feterita, federita, Sorghum_vulgare_caudatum hegari kaoliang milo, milo_maize shallu, Sorghum_vulgare_rosburghii broomcorn, Sorghum_vulgare_technicum cordgrass, cord_grass salt_reed_grass, Spartina_cynosuroides prairie_cordgrass, freshwater_cordgrass, slough_grass, Spartina_pectinmata smut_grass, blackseed, carpet_grass, Sporobolus_poiretii sand_dropseed, Sporobolus_cryptandrus rush_grass, rush-grass St._Augustine_grass, Stenotaphrum_secundatum, buffalo_grass grain cereal, cereal_grass wheat wheat_berry durum, durum_wheat, hard_wheat, Triticum_durum, Triticum_turgidum, macaroni_wheat spelt, Triticum_spelta, Triticum_aestivum_spelta emmer, starch_wheat, two-grain_spelt, Triticum_dicoccum wild_wheat, wild_emmer, Triticum_dicoccum_dicoccoides corn, maize, Indian_corn, Zea_mays mealie corn dent_corn, Zea_mays_indentata flint_corn, flint_maize, Yankee_corn, Zea_mays_indurata popcorn, Zea_mays_everta zoysia Manila_grass, Japanese_carpet_grass, Zoysia_matrella Korean_lawn_grass, Japanese_lawn_grass, Zoysia_japonica bamboo common_bamboo, Bambusa_vulgaris giant_bamboo, kyo-chiku, Dendrocalamus_giganteus umbrella_plant, umbrella_sedge, Cyperus_alternifolius chufa, yellow_nutgrass, earth_almond, ground_almond, rush_nut, Cyperus_esculentus galingale, galangal, Cyperus_longus nutgrass, nut_grass, nutsedge, nut_sedge, Cyperus_rotundus sand_sedge, sand_reed, Carex_arenaria cypress_sedge, Carex_pseudocyperus cotton_grass, cotton_rush common_cotton_grass, Eriophorum_angustifolium hardstem_bulrush, hardstemmed_bulrush, Scirpus_acutus wool_grass, Scirpus_cyperinus spike_rush water_chestnut, Chinese_water_chestnut, Eleocharis_dulcis needle_spike_rush, needle_rush, slender_spike_rush, hair_grass, Eleocharis_acicularis creeping_spike_rush, Eleocharis_palustris pandanus, screw_pine textile_screw_pine, lauhala, Pandanus_tectorius cattail cat's-tail, bullrush, bulrush, nailrod, reed_mace, reedmace, Typha_latifolia bur_reed grain, caryopsis kernel rye gourd, gourd_vine gourd pumpkin, pumpkin_vine, autumn_pumpkin, Cucurbita_pepo squash, squash_vine summer_squash, summer_squash_vine, Cucurbita_pepo_melopepo yellow_squash marrow, marrow_squash, vegetable_marrow zucchini, courgette cocozelle, Italian_vegetable_marrow cymling, pattypan_squash spaghetti_squash winter_squash, winter_squash_plant acorn_squash hubbard_squash, Cucurbita_maxima turban_squash, Cucurbita_maxima_turbaniformis buttercup_squash butternut_squash, Cucurbita_maxima winter_crookneck, winter_crookneck_squash, Cucurbita_moschata cushaw, Cucurbita_mixta, Cucurbita_argyrosperma prairie_gourd, prairie_gourd_vine, Missouri_gourd, wild_pumpkin, buffalo_gourd, calabazilla, Cucurbita_foetidissima prairie_gourd bryony, briony white_bryony, devil's_turnip, Bryonia_alba sweet_melon, muskmelon, sweet_melon_vine, Cucumis_melo cantaloupe, cantaloup, cantaloupe_vine, cantaloup_vine, Cucumis_melo_cantalupensis winter_melon, Persian_melon, honeydew_melon, winter_melon_vine, Cucumis_melo_inodorus net_melon, netted_melon, nutmeg_melon, Cucumis_melo_reticulatus cucumber, cucumber_vine, Cucumis_sativus squirting_cucumber, exploding_cucumber, touch-me-not, Ecballium_elaterium bottle_gourd, calabash, Lagenaria_siceraria luffa, dishcloth_gourd, sponge_gourd, rag_gourd, strainer_vine loofah, vegetable_sponge, Luffa_cylindrica angled_loofah, sing-kwa, Luffa_acutangula loofa, loofah, luffa, loufah_sponge balsam_apple, Momordica_balsamina balsam_pear, Momordica_charantia lobelia water_lobelia, Lobelia_dortmanna mallow musk_mallow, mus_rose, Malva_moschata common_mallow, Malva_neglecta okra, gumbo, okra_plant, lady's-finger, Abelmoschus_esculentus, Hibiscus_esculentus okra abelmosk, musk_mallow, Abelmoschus_moschatus, Hibiscus_moschatus flowering_maple velvetleaf, velvet-leaf, velvetweed, Indian_mallow, butter-print, China_jute, Abutilon_theophrasti hollyhock rose_mallow, Alcea_rosea, Althea_rosea althea, althaea, hollyhock marsh_mallow, white_mallow, Althea_officinalis poppy_mallow fringed_poppy_mallow, Callirhoe_digitata purple_poppy_mallow, Callirhoe_involucrata clustered_poppy_mallow, Callirhoe_triangulata sea_island_cotton, tree_cotton, Gossypium_barbadense Levant_cotton, Gossypium_herbaceum upland_cotton, Gossypium_hirsutum Peruvian_cotton, Gossypium_peruvianum wild_cotton, Arizona_wild_cotton, Gossypium_thurberi kenaf, kanaf, deccan_hemp, bimli, bimli_hemp, Indian_hemp, Bombay_hemp, Hibiscus_cannabinus sorrel_tree, Hibiscus_heterophyllus rose_mallow, swamp_mallow, common_rose_mallow, swamp_rose_mallow, Hibiscus_moscheutos cotton_rose, Confederate_rose, Confederate_rose_mallow, Hibiscus_mutabilis roselle, rozelle, sorrel, red_sorrel, Jamaica_sorrel, Hibiscus_sabdariffa mahoe, majagua, mahagua, balibago, purau, Hibiscus_tiliaceus flower-of-an-hour, flowers-of-an-hour, bladder_ketmia, black-eyed_Susan, Hibiscus_trionum lacebark, ribbonwood, houhere, Hoheria_populnea wild_hollyhock, Iliamna_remota, Sphaeralcea_remota mountain_hollyhock, Iliamna_ruvularis, Iliamna_acerifolia seashore_mallow salt_marsh_mallow, Kosteletzya_virginica chaparral_mallow, Malacothamnus_fasciculatus, Sphaeralcea_fasciculata malope, Malope_trifida false_mallow waxmallow, wax_mallow, sleeping_hibiscus glade_mallow, Napaea_dioica pavonia ribbon_tree, ribbonwood, Plagianthus_regius, Plagianthus_betulinus bush_hibiscus, Radyera_farragei, Hibiscus_farragei Virginia_mallow, Sida_hermaphrodita Queensland_hemp, jellyleaf, Sida_rhombifolia Indian_mallow, Sida_spinosa checkerbloom, wild_hollyhock, Sidalcea_malviflora globe_mallow, false_mallow prairie_mallow, red_false_mallow, Sphaeralcea_coccinea, Malvastrum_coccineum tulipwood_tree portia_tree, bendy_tree, seaside_mahoe, Thespesia_populnea red_silk-cotton_tree, simal, Bombax_ceiba, Bombax_malabarica cream-of-tartar_tree, sour_gourd, Adansonia_gregorii baobab, monkey-bread_tree, Adansonia_digitata kapok, ceiba_tree, silk-cotton_tree, white_silk-cotton_tree, Bombay_ceiba, God_tree, Ceiba_pentandra durian, durion, durian_tree, Durio_zibethinus Montezuma shaving-brush_tree, Pseudobombax_ellipticum quandong, quandong_tree, Brisbane_quandong, silver_quandong_tree, blue_fig, Elaeocarpus_grandis quandong, blue_fig makomako, New_Zealand_wine_berry, wineberry, Aristotelia_serrata, Aristotelia_racemosa Jamaican_cherry, calabur_tree, calabura, silk_wood, silkwood, Muntingia_calabura breakax, breakaxe, break-axe, Sloanea_jamaicensis sterculia Panama_tree, Sterculia_apetala kalumpang, Java_olives, Sterculia_foetida bottle-tree, bottle_tree flame_tree, flame_durrajong, Brachychiton_acerifolius, Sterculia_acerifolia flame_tree, broad-leaved_bottletree, Brachychiton_australis kurrajong, currajong, Brachychiton_populneus Queensland_bottletree, narrow-leaved_bottletree, Brachychiton_rupestris, Sterculia_rupestris kola, kola_nut, kola_nut_tree, goora_nut, Cola_acuminata kola_nut, cola_nut Chinese_parasol_tree, Chinese_parasol, Japanese_varnish_tree, phoenix_tree, Firmiana_simplex flannelbush, flannel_bush, California_beauty screw_tree nut-leaved_screw_tree, Helicteres_isora red_beech, brown_oak, booyong, crow's_foot, stave_wood, silky_elm, Heritiera_trifoliolata, Terrietia_trifoliolata looking_glass_tree, Heritiera_macrophylla looking-glass_plant, Heritiera_littoralis honey_bell, honeybells, Hermannia_verticillata, Mahernia_verticillata mayeng, maple-leaved_bayur, Pterospermum_acerifolium silver_tree, Tarrietia_argyrodendron cacao, cacao_tree, chocolate_tree, Theobroma_cacao obeche, obechi, arere, samba, Triplochiton_scleroxcylon linden, linden_tree, basswood, lime, lime_tree American_basswood, American_lime, Tilia_americana small-leaved_linden, small-leaved_lime, Tilia_cordata white_basswood, cottonwood, Tilia_heterophylla Japanese_linden, Japanese_lime, Tilia_japonica silver_lime, silver_linden, Tilia_tomentosa corchorus African_hemp, Sparmannia_africana herb, herbaceous_plant protea honeypot, king_protea, Protea_cynaroides honeyflower, honey-flower, Protea_mellifera banksia honeysuckle, Australian_honeysuckle, coast_banksia, Banksia_integrifolia smoke_bush Chilean_firebush, Chilean_flameflower, Embothrium_coccineum Chilean_nut, Chile_nut, Chile_hazel, Chilean_hazelnut, Guevina_heterophylla, Guevina_avellana grevillea red-flowered_silky_oak, Grevillea_banksii silky_oak, Grevillea_robusta beefwood, Grevillea_striata cushion_flower, pincushion_hakea, Hakea_laurina rewa-rewa, New_Zealand_honeysuckle honeyflower, honey-flower, mountain_devil, Lambertia_formosa silver_tree, Leucadendron_argenteum lomatia macadamia, macadamia_tree Macadamia_integrifolia macadamia_nut, macadamia_nut_tree, Macadamia_ternifolia Queensland_nut, Macadamia_tetraphylla prickly_ash, Orites_excelsa geebung wheel_tree, firewheel_tree, Stenocarpus_sinuatus scrub_beefwood, beefwood, Stenocarpus_salignus waratah, Telopea_Oreades waratah, Telopea_speciosissima casuarina she-oak beefwood Australian_pine, Casuarina_equisetfolia heath tree_heath, briar, brier, Erica_arborea briarroot winter_heath, spring_heath, Erica_carnea bell_heather, heather_bell, fine-leaved_heath, Erica_cinerea Cornish_heath, Erica_vagans Spanish_heath, Portuguese_heath, Erica_lusitanica Prince-of-Wales'-heath, Prince_of_Wales_heath, Erica_perspicua bog_rosemary, moorwort, Andromeda_glaucophylla marsh_andromeda, common_bog_rosemary, Andromeda_polifolia madrona, madrono, manzanita, Arbutus_menziesii strawberry_tree, Irish_strawberry, Arbutus_unedo bearberry alpine_bearberry, black_bearberry, Arctostaphylos_alpina heartleaf_manzanita, Arctostaphylos_andersonii Parry_manzanita, Arctostaphylos_manzanita spike_heath, Bruckenthalia_spiculifolia bryanthus leatherleaf, Chamaedaphne_calyculata Connemara_heath, St._Dabeoc's_heath, Daboecia_cantabrica trailing_arbutus, mayflower, Epigaea_repens creeping_snowberry, moxie_plum, maidenhair_berry, Gaultheria_hispidula salal, shallon, Gaultheria_shallon huckleberry black_huckleberry, Gaylussacia_baccata dangleberry, dangle-berry, Gaylussacia_frondosa box_huckleberry, Gaylussacia_brachycera kalmia mountain_laurel, wood_laurel, American_laurel, calico_bush, Kalmia_latifolia swamp_laurel, bog_laurel, bog_kalmia, Kalmia_polifolia trapper's_tea, glandular_Labrador_tea wild_rosemary, marsh_tea, Ledum_palustre sand_myrtle, Leiophyllum_buxifolium leucothoe dog_laurel, dog_hobble, switch-ivy, Leucothoe_fontanesiana, Leucothoe_editorum sweet_bells, Leucothoe_racemosa alpine_azalea, mountain_azalea, Loiseleuria_procumbens staggerbush, stagger_bush, Lyonia_mariana maleberry, male_berry, privet_andromeda, he-huckleberry, Lyonia_ligustrina fetterbush, fetter_bush, shiny_lyonia, Lyonia_lucida false_azalea, fool's_huckleberry, Menziesia_ferruginea minniebush, minnie_bush, Menziesia_pilosa sorrel_tree, sourwood, titi, Oxydendrum_arboreum mountain_heath, Phyllodoce_caerulea, Bryanthus_taxifolius purple_heather, Brewer's_mountain_heather, Phyllodoce_breweri fetterbush, mountain_fetterbush, mountain_andromeda, Pieris_floribunda rhododendron coast_rhododendron, Rhododendron_californicum rosebay, Rhododendron_maxima swamp_azalea, swamp_honeysuckle, white_honeysuckle, Rhododendron_viscosum azalea cranberry American_cranberry, large_cranberry, Vaccinium_macrocarpon European_cranberry, small_cranberry, Vaccinium_oxycoccus blueberry, blueberry_bush farkleberry, sparkleberry, Vaccinium_arboreum low-bush_blueberry, low_blueberry, Vaccinium_angustifolium, Vaccinium_pennsylvanicum rabbiteye_blueberry, rabbit-eye_blueberry, rabbiteye, Vaccinium_ashei dwarf_bilberry, dwarf_blueberry, Vaccinium_caespitosum evergreen_blueberry, Vaccinium_myrsinites evergreen_huckleberry, Vaccinium_ovatum bilberry, thin-leaved_bilberry, mountain_blue_berry, Viccinium_membranaceum bilberry, whortleberry, whinberry, blaeberry, Viccinium_myrtillus bog_bilberry, bog_whortleberry, moor_berry, Vaccinium_uliginosum_alpinum dryland_blueberry, dryland_berry, Vaccinium_pallidum grouseberry, grouse-berry, grouse_whortleberry, Vaccinium_scoparium deerberry, squaw_huckleberry, Vaccinium_stamineum cowberry, mountain_cranberry, lingonberry, lingenberry, lingberry, foxberry, Vaccinium_vitis-idaea diapensia galax, galaxy, wandflower, beetleweed, coltsfoot, Galax_urceolata pyxie, pixie, pixy, Pyxidanthera_barbulata shortia oconee_bells, Shortia_galacifolia Australian_heath epacris common_heath, Epacris_impressa common_heath, blunt-leaf_heath, Epacris_obtusifolia Port_Jackson_heath, Epacris_purpurascens native_cranberry, groundberry, ground-berry, cranberry_heath, Astroloma_humifusum, Styphelia_humifusum pink_fivecorner, Styphelia_triflora wintergreen, pyrola false_wintergreen, Pyrola_americana, Pyrola_rotundifolia_americana lesser_wintergreen, Pyrola_minor wild_lily_of_the_valley, shinleaf, Pyrola_elliptica wild_lily_of_the_valley, Pyrola_rotundifolia pipsissewa, prince's_pine love-in-winter, western_prince's_pine, Chimaphila_umbellata, Chimaphila_corymbosa one-flowered_wintergreen, one-flowered_pyrola, Moneses_uniflora, Pyrola_uniflora Indian_pipe, waxflower, Monotropa_uniflora pinesap, false_beachdrops, Monotropa_hypopithys beech, beech_tree common_beech, European_beech, Fagus_sylvatica copper_beech, purple_beech, Fagus_sylvatica_atropunicea, Fagus_purpurea, Fagus_sylvatica_purpurea American_beech, white_beech, red_beech, Fagus_grandifolia, Fagus_americana weeping_beech, Fagus_pendula, Fagus_sylvatica_pendula Japanese_beech chestnut, chestnut_tree American_chestnut, American_sweet_chestnut, Castanea_dentata European_chestnut, sweet_chestnut, Spanish_chestnut, Castanea_sativa Chinese_chestnut, Castanea_mollissima Japanese_chestnut, Castanea_crenata Allegheny_chinkapin, eastern_chinquapin, chinquapin, dwarf_chestnut, Castanea_pumila Ozark_chinkapin, Ozark_chinquapin, chinquapin, Castanea_ozarkensis oak_chestnut giant_chinkapin, golden_chinkapin, Chrysolepis_chrysophylla, Castanea_chrysophylla, Castanopsis_chrysophylla dwarf_golden_chinkapin, Chrysolepis_sempervirens tanbark_oak, Lithocarpus_densiflorus Japanese_oak, Lithocarpus_glabra, Lithocarpus_glaber southern_beech, evergreen_beech myrtle_beech, Nothofagus_cuninghamii Coigue, Nothofagus_dombeyi New_Zealand_beech silver_beech, Nothofagus_menziesii roble_beech, Nothofagus_obliqua rauli_beech, Nothofagus_procera black_beech, Nothofagus_solanderi hard_beech, Nothofagus_truncata acorn cupule, acorn_cup oak, oak_tree live_oak coast_live_oak, California_live_oak, Quercus_agrifolia white_oak American_white_oak, Quercus_alba Arizona_white_oak, Quercus_arizonica swamp_white_oak, swamp_oak, Quercus_bicolor European_turkey_oak, turkey_oak, Quercus_cerris canyon_oak, canyon_live_oak, maul_oak, iron_oak, Quercus_chrysolepis scarlet_oak, Quercus_coccinea jack_oak, northern_pin_oak, Quercus_ellipsoidalis red_oak southern_red_oak, swamp_red_oak, turkey_oak, Quercus_falcata Oregon_white_oak, Oregon_oak, Garry_oak, Quercus_garryana holm_oak, holm_tree, holly-leaved_oak, evergreen_oak, Quercus_ilex bear_oak, Quercus_ilicifolia shingle_oak, laurel_oak, Quercus_imbricaria bluejack_oak, turkey_oak, Quercus_incana California_black_oak, Quercus_kelloggii American_turkey_oak, turkey_oak, Quercus_laevis laurel_oak, pin_oak, Quercus_laurifolia California_white_oak, valley_oak, valley_white_oak, roble, Quercus_lobata overcup_oak, Quercus_lyrata bur_oak, burr_oak, mossy-cup_oak, mossycup_oak, Quercus_macrocarpa scrub_oak blackjack_oak, blackjack, jack_oak, Quercus_marilandica swamp_chestnut_oak, Quercus_michauxii Japanese_oak, Quercus_mongolica, Quercus_grosseserrata chestnut_oak chinquapin_oak, chinkapin_oak, yellow_chestnut_oak, Quercus_muehlenbergii myrtle_oak, seaside_scrub_oak, Quercus_myrtifolia water_oak, possum_oak, Quercus_nigra Nuttall_oak, Nuttall's_oak, Quercus_nuttalli durmast, Quercus_petraea, Quercus_sessiliflora basket_oak, cow_oak, Quercus_prinus, Quercus_montana pin_oak, swamp_oak, Quercus_palustris willow_oak, Quercus_phellos dwarf_chinkapin_oak, dwarf_chinquapin_oak, dwarf_oak, Quercus_prinoides common_oak, English_oak, pedunculate_oak, Quercus_robur northern_red_oak, Quercus_rubra, Quercus_borealis Shumard_oak, Shumard_red_oak, Quercus_shumardii post_oak, box_white_oak, brash_oak, iron_oak, Quercus_stellata cork_oak, Quercus_suber Spanish_oak, Quercus_texana huckleberry_oak, Quercus_vaccinifolia Chinese_cork_oak, Quercus_variabilis black_oak, yellow_oak, quercitron, quercitron_oak, Quercus_velutina southern_live_oak, Quercus_virginiana interior_live_oak, Quercus_wislizenii, Quercus_wizlizenii mast birch, birch_tree yellow_birch, Betula_alleghaniensis, Betula_leutea American_white_birch, paper_birch, paperbark_birch, canoe_birch, Betula_cordifolia, Betula_papyrifera grey_birch, gray_birch, American_grey_birch, American_gray_birch, Betula_populifolia silver_birch, common_birch, European_white_birch, Betula_pendula downy_birch, white_birch, Betula_pubescens black_birch, river_birch, red_birch, Betula_nigra sweet_birch, cherry_birch, black_birch, Betula_lenta Yukon_white_birch, Betula_neoalaskana swamp_birch, water_birch, mountain_birch, Western_paper_birch, Western_birch, Betula_fontinalis Newfoundland_dwarf_birch, American_dwarf_birch, Betula_glandulosa alder, alder_tree common_alder, European_black_alder, Alnus_glutinosa, Alnus_vulgaris grey_alder, gray_alder, Alnus_incana seaside_alder, Alnus_maritima white_alder, mountain_alder, Alnus_rhombifolia red_alder, Oregon_alder, Alnus_rubra speckled_alder, Alnus_rugosa smooth_alder, hazel_alder, Alnus_serrulata green_alder, Alnus_veridis green_alder, Alnus_veridis_crispa, Alnus_crispa hornbeam European_hornbeam, Carpinus_betulus American_hornbeam, Carpinus_caroliniana hop_hornbeam Old_World_hop_hornbeam, Ostrya_carpinifolia Eastern_hop_hornbeam, ironwood, ironwood_tree, Ostrya_virginiana hazelnut, hazel, hazelnut_tree American_hazel, Corylus_americana cobnut, filbert, Corylus_avellana, Corylus_avellana_grandis beaked_hazelnut, Corylus_cornuta centaury rosita, Centaurium_calycosum lesser_centaury, Centaurium_minus seaside_centaury slender_centaury prairie_gentian, tulip_gentian, bluebell, Eustoma_grandiflorum Persian_violet, Exacum_affine columbo, American_columbo, deer's-ear, deer's-ears, pyramid_plant, American_gentian gentian gentianella, Gentiana_acaulis closed_gentian, blind_gentian, bottle_gentian, Gentiana_andrewsii explorer's_gentian, Gentiana_calycosa closed_gentian, blind_gentian, Gentiana_clausa great_yellow_gentian, Gentiana_lutea marsh_gentian, calathian_violet, Gentiana_pneumonanthe soapwort_gentian, Gentiana_saponaria striped_gentian, Gentiana_villosa agueweed, ague_weed, five-flowered_gentian, stiff_gentian, Gentianella_quinquefolia, Gentiana_quinquefolia felwort, gentianella_amarella fringed_gentian Gentianopsis_crinita, Gentiana_crinita Gentianopsis_detonsa, Gentiana_detonsa Gentianopsid_procera, Gentiana_procera Gentianopsis_thermalis, Gentiana_thermalis tufted_gentian, Gentianopsis_holopetala, Gentiana_holopetala spurred_gentian sabbatia toothbrush_tree, mustard_tree, Salvadora_persica olive_tree olive, European_olive_tree, Olea_europaea olive black_maire, Olea_cunninghamii white_maire, Olea_lanceolata fringe_tree fringe_bush, Chionanthus_virginicus forestiera forsythia ash, ash_tree white_ash, Fraxinus_Americana swamp_ash, Fraxinus_caroliniana flowering_ash, Fraxinus_cuspidata European_ash, common_European_ash, Fraxinus_excelsior Oregon_ash, Fraxinus_latifolia, Fraxinus_oregona black_ash, basket_ash, brown_ash, hoop_ash, Fraxinus_nigra manna_ash, flowering_ash, Fraxinus_ornus red_ash, downy_ash, Fraxinus_pennsylvanica green_ash, Fraxinus_pennsylvanica_subintegerrima blue_ash, Fraxinus_quadrangulata mountain_ash, Fraxinus_texensis pumpkin_ash, Fraxinus_tomentosa Arizona_ash, Fraxinus_velutina jasmine primrose_jasmine, Jasminum_mesnyi winter_jasmine, Jasminum_nudiflorum common_jasmine, true_jasmine, jessamine, Jasminum_officinale privet Amur_privet, Ligustrum_amurense Japanese_privet, Ligustrum_japonicum Ligustrum_obtusifolium common_privet, Ligustrum_vulgare devilwood, American_olive, Osmanthus_americanus mock_privet lilac Himalayan_lilac, Syringa_emodi Persian_lilac, Syringa_persica Japanese_tree_lilac, Syringa_reticulata, Syringa_amurensis_japonica Japanese_lilac, Syringa_villosa common_lilac, Syringa_vulgaris bloodwort kangaroo_paw, kangaroo's_paw, kangaroo's-foot, kangaroo-foot_plant, Australian_sword_lily, Anigozanthus_manglesii Virginian_witch_hazel, Hamamelis_virginiana vernal_witch_hazel, Hamamelis_vernalis winter_hazel, flowering_hazel fothergilla, witch_alder liquidambar sweet_gum, sweet_gum_tree, bilsted, red_gum, American_sweet_gum, Liquidambar_styraciflua iron_tree, iron-tree, ironwood, ironwood_tree walnut, walnut_tree California_black_walnut, Juglans_californica butternut, butternut_tree, white_walnut, Juglans_cinerea black_walnut, black_walnut_tree, black_hickory, Juglans_nigra English_walnut, English_walnut_tree, Circassian_walnut, Persian_walnut, Juglans_regia hickory, hickory_tree water_hickory, bitter_pecan, water_bitternut, Carya_aquatica pignut, pignut_hickory, brown_hickory, black_hickory, Carya_glabra bitternut, bitternut_hickory, bitter_hickory, bitter_pignut, swamp_hickory, Carya_cordiformis pecan, pecan_tree, Carya_illinoensis, Carya_illinoinsis big_shellbark, big_shellbark_hickory, big_shagbark, king_nut, king_nut_hickory, Carya_laciniosa nutmeg_hickory, Carya_myristicaeformis, Carya_myristiciformis shagbark, shagbark_hickory, shellbark, shellbark_hickory, Carya_ovata mockernut, mockernut_hickory, black_hickory, white-heart_hickory, big-bud_hickory, Carya_tomentosa wing_nut, wing-nut Caucasian_walnut, Pterocarya_fraxinifolia dhawa, dhava combretum hiccup_nut, hiccough_nut, Combretum_bracteosum bush_willow, Combretum_appiculatum bush_willow, Combretum_erythrophyllum button_tree, button_mangrove, Conocarpus_erectus white_mangrove, Laguncularia_racemosa oleaster water_milfoil anchovy_pear, anchovy_pear_tree, Grias_cauliflora brazil_nut, brazil-nut_tree, Bertholletia_excelsa loosestrife purple_loosestrife, spiked_loosestrife, Lythrum_salicaria grass_poly, hyssop_loosestrife, Lythrum_hyssopifolia crape_myrtle, crepe_myrtle, crepe_flower, Lagerstroemia_indica Queen's_crape_myrtle, pride-of-India, Lagerstroemia_speciosa myrtaceous_tree myrtle common_myrtle, Myrtus_communis bayberry, bay-rum_tree, Jamaica_bayberry, wild_cinnamon, Pimenta_acris allspice, allspice_tree, pimento_tree, Pimenta_dioica allspice_tree, Pimenta_officinalis sour_cherry, Eugenia_corynantha nakedwood, Eugenia_dicrana Surinam_cherry, pitanga, Eugenia_uniflora rose_apple, rose-apple_tree, jambosa, Eugenia_jambos feijoa, feijoa_bush jaboticaba, jaboticaba_tree, Myrciaria_cauliflora guava, true_guava, guava_bush, Psidium_guajava guava, strawberry_guava, yellow_cattley_guava, Psidium_littorale cattley_guava, purple_strawberry_guava, Psidium_cattleianum, Psidium_littorale_longipes Brazilian_guava, Psidium_guineense gum_tree, gum eucalyptus, eucalypt, eucalyptus_tree flooded_gum mallee stringybark smoothbark red_gum, peppermint, peppermint_gum, Eucalyptus_amygdalina red_gum, marri, Eucalyptus_calophylla river_red_gum, river_gum, Eucalyptus_camaldulensis, Eucalyptus_rostrata mountain_swamp_gum, Eucalyptus_camphora snow_gum, ghost_gum, white_ash, Eucalyptus_coriacea, Eucalyptus_pauciflora alpine_ash, mountain_oak, Eucalyptus_delegatensis white_mallee, congoo_mallee, Eucalyptus_dumosa white_stringybark, thin-leaved_stringybark, Eucalyptusd_eugenioides white_mountain_ash, Eucalyptus_fraxinoides blue_gum, fever_tree, Eucalyptus_globulus rose_gum, Eucalypt_grandis cider_gum, Eucalypt_gunnii swamp_gum, Eucalypt_ovata spotted_gum, Eucalyptus_maculata lemon-scented_gum, Eucalyptus_citriodora, Eucalyptus_maculata_citriodora black_mallee, black_sally, black_gum, Eucalytus_stellulata forest_red_gum, Eucalypt_tereticornis mountain_ash, Eucalyptus_regnans manna_gum, Eucalyptus_viminalis clove, clove_tree, Syzygium_aromaticum, Eugenia_aromaticum, Eugenia_caryophyllatum clove tupelo, tupelo_tree water_gum, Nyssa_aquatica sour_gum, black_gum, pepperidge, Nyssa_sylvatica enchanter's_nightshade Circaea_lutetiana willowherb fireweed, giant_willowherb, rosebay_willowherb, wickup, Epilobium_angustifolium California_fuchsia, humming_bird's_trumpet, Epilobium_canum_canum, Zauschneria_californica fuchsia lady's-eardrop, ladies'-eardrop, lady's-eardrops, ladies'-eardrops, Fuchsia_coccinea evening_primrose common_evening_primrose, German_rampion, Oenothera_biennis sundrops, Oenothera_fruticosa Missouri_primrose, Ozark_sundrops, Oenothera_macrocarpa pomegranate, pomegranate_tree, Punica_granatum mangrove, Rhizophora_mangle daphne garland_flower, Daphne_cneorum spurge_laurel, wood_laurel, Daphne_laureola mezereon, February_daphne, Daphne_mezereum Indian_rhododendron, Melastoma_malabathricum Medinilla_magnifica deer_grass, meadow_beauty canna achira, indian_shot, arrowroot, Canna_indica, Canna_edulis arrowroot, American_arrowroot, obedience_plant, Maranta_arundinaceae banana, banana_tree dwarf_banana, Musa_acuminata Japanese_banana, Musa_basjoo plantain, plantain_tree, Musa_paradisiaca edible_banana, Musa_paradisiaca_sapientum abaca, Manila_hemp, Musa_textilis Abyssinian_banana, Ethiopian_banana, Ensete_ventricosum, Musa_ensete ginger common_ginger, Canton_ginger, stem_ginger, Zingiber_officinale turmeric, Curcuma_longa, Curcuma_domestica galangal, Alpinia_galanga shellflower, shall-flower, shell_ginger, Alpinia_Zerumbet, Alpinia_speciosa, Languas_speciosa grains_of_paradise, Guinea_grains, Guinea_pepper, melagueta_pepper, Aframomum_melegueta cardamom, cardamon, Elettaria_cardamomum begonia fibrous-rooted_begonia tuberous_begonia rhizomatous_begonia Christmas_begonia, blooming-fool_begonia, Begonia_cheimantha angel-wing_begonia, Begonia_cocchinea beefsteak_begonia, kidney_begonia, Begonia_erythrophylla, Begonia_feastii star_begonia, star-leaf_begonia, Begonia_heracleifolia rex_begonia, king_begonia, painted-leaf_begonia, beefsteak_geranium, Begonia_rex wax_begonia, Begonia_semperflorens Socotra_begonia, Begonia_socotrana hybrid_tuberous_begonia, Begonia_tuberhybrida dillenia guinea_gold_vine, guinea_flower poon calaba, Santa_Maria_tree, Calophyllum_calaba Maria, Calophyllum_longifolium laurelwood, lancewood_tree, Calophyllum_candidissimum Alexandrian_laurel, Calophyllum_inophyllum clusia wild_fig, Clusia_flava waxflower, Clusia_insignis pitch_apple, strangler_fig, Clusia_rosea, Clusia_major mangosteen, mangosteen_tree, Garcinia_mangostana gamboge_tree, Garcinia_hanburyi, Garcinia_cambogia, Garcinia_gummi-gutta St_John's_wort common_St_John's_wort, tutsan, Hypericum_androsaemum great_St_John's_wort, Hypericum_ascyron, Hypericum_pyramidatum creeping_St_John's_wort, Hypericum_calycinum low_St_Andrew's_cross, Hypericum_hypericoides klammath_weed, Hypericum_perforatum shrubby_St_John's_wort, Hypericum_prolificum, Hypericum_spathulatum St_Peter's_wort, Hypericum_tetrapterum, Hypericum_maculatum marsh_St-John's_wort, Hypericum_virginianum mammee_apple, mammee, mamey, mammee_tree, Mammea_americana rose_chestnut, ironwood, ironwood_tree, Mesua_ferrea bower_actinidia, tara_vine, Actinidia_arguta Chinese_gooseberry, kiwi, kiwi_vine, Actinidia_chinensis, Actinidia_deliciosa silvervine, silver_vine, Actinidia_polygama wild_cinnamon, white_cinnamon_tree, Canella_winterana, Canella-alba papaya, papaia, pawpaw, papaya_tree, melon_tree, Carica_papaya souari, souari_nut, souari_tree, Caryocar_nuciferum rockrose, rock_rose white-leaved_rockrose, Cistus_albidus common_gum_cistus, Cistus_ladanifer, Cistus_ladanum frostweed, frost-weed, frostwort, Helianthemum_canadense, Crocanthemum_canadense dipterocarp red_lauan, red_lauan_tree, Shorea_teysmanniana governor's_plum, governor_plum, Madagascar_plum, ramontchi, batoko_palm, Flacourtia_indica kei_apple, kei_apple_bush, Dovyalis_caffra ketembilla, kitembilla, kitambilla, ketembilla_tree, Ceylon_gooseberry, Dovyalis_hebecarpa chaulmoogra, chaulmoogra_tree, chaulmugra, Hydnocarpus_kurzii, Taraktagenos_kurzii, Taraktogenos_kurzii wild_peach, Kiggelaria_africana candlewood boojum_tree, cirio, Fouquieria_columnaris, Idria_columnaris bird's-eye_bush, Ochna_serrulata granadilla, purple_granadillo, Passiflora_edulis granadilla, sweet_granadilla, Passiflora_ligularis granadilla, giant_granadilla, Passiflora_quadrangularis maypop, Passiflora_incarnata Jamaica_honeysuckle, yellow_granadilla, Passiflora_laurifolia banana_passion_fruit, Passiflora_mollissima sweet_calabash, Passiflora_maliformis love-in-a-mist, running_pop, wild_water_lemon, Passiflora_foetida reseda mignonette, sweet_reseda, Reseda_odorata dyer's_rocket, dyer's_mignonette, weld, Reseda_luteola false_tamarisk, German_tamarisk, Myricaria_germanica halophyte viola violet field_pansy, heartsease, Viola_arvensis American_dog_violet, Viola_conspersa dog_violet, heath_violet, Viola_canina horned_violet, tufted_pansy, Viola_cornuta two-eyed_violet, heartsease, Viola_ocellata bird's-foot_violet, pansy_violet, Johnny-jump-up, wood_violet, Viola_pedata downy_yellow_violet, Viola_pubescens long-spurred_violet, Viola_rostrata pale_violet, striped_violet, cream_violet, Viola_striata hedge_violet, wood_violet, Viola_sylvatica, Viola_reichenbachiana nettle stinging_nettle, Urtica_dioica Roman_nettle, Urtica_pipulifera ramie, ramee, Chinese_silk_plant, China_grass, Boehmeria_nivea wood_nettle, Laportea_canadensis Australian_nettle, Australian_nettle_tree pellitory-of-the-wall, wall_pellitory, pellitory, Parietaria_difussa richweed, clearweed, dead_nettle, Pilea_pumilla artillery_plant, Pilea_microphylla friendship_plant, panamica, panamiga, Pilea_involucrata Queensland_grass-cloth_plant, Pipturus_argenteus Pipturus_albidus cannabis, hemp Indian_hemp, Cannabis_indica mulberry, mulberry_tree white_mulberry, Morus_alba black_mulberry, Morus_nigra red_mulberry, Morus_rubra osage_orange, bow_wood, mock_orange, Maclura_pomifera breadfruit, breadfruit_tree, Artocarpus_communis, Artocarpus_altilis jackfruit, jackfruit_tree, Artocarpus_heterophyllus marang, marang_tree, Artocarpus_odoratissima fig_tree fig, common_fig, common_fig_tree, Ficus_carica caprifig, Ficus_carica_sylvestris golden_fig, Florida_strangler_fig, strangler_fig, wild_fig, Ficus_aurea banyan, banyan_tree, banian, banian_tree, Indian_banyan, East_Indian_fig_tree, Ficus_bengalensis pipal, pipal_tree, pipul, peepul, sacred_fig, bo_tree, Ficus_religiosa India-rubber_tree, India-rubber_plant, India-rubber_fig, rubber_plant, Assam_rubber, Ficus_elastica mistletoe_fig, mistletoe_rubber_plant, Ficus_diversifolia, Ficus_deltoidea Port_Jackson_fig, rusty_rig, little-leaf_fig, Botany_Bay_fig, Ficus_rubiginosa sycamore, sycamore_fig, mulberry_fig, Ficus_sycomorus paper_mulberry, Broussonetia_papyrifera trumpetwood, trumpet-wood, trumpet_tree, snake_wood, imbauba, Cecropia_peltata elm, elm_tree winged_elm, wing_elm, Ulmus_alata American_elm, white_elm, water_elm, rock_elm, Ulmus_americana smooth-leaved_elm, European_field_elm, Ulmus_carpinifolia cedar_elm, Ulmus_crassifolia witch_elm, wych_elm, Ulmus_glabra Dutch_elm, Ulmus_hollandica Huntingdon_elm, Ulmus_hollandica_vegetata water_elm, Ulmus_laevis Chinese_elm, Ulmus_parvifolia English_elm, European_elm, Ulmus_procera Siberian_elm, Chinese_elm, dwarf_elm, Ulmus_pumila slippery_elm, red_elm, Ulmus_rubra Jersey_elm, guernsey_elm, wheately_elm, Ulmus_sarniensis, Ulmus_campestris_sarniensis, Ulmus_campestris_wheatleyi September_elm, red_elm, Ulmus_serotina rock_elm, Ulmus_thomasii hackberry, nettle_tree European_hackberry, Mediterranean_hackberry, Celtis_australis American_hackberry, Celtis_occidentalis sugarberry, Celtis_laevigata iridaceous_plant bearded_iris beardless_iris orrisroot, orris dwarf_iris, Iris_cristata Dutch_iris, Iris_filifolia Florentine_iris, orris, Iris_germanica_florentina, Iris_florentina stinking_iris, gladdon, gladdon_iris, stinking_gladwyn, roast_beef_plant, Iris_foetidissima German_iris, Iris_germanica Japanese_iris, Iris_kaempferi German_iris, Iris_kochii Dalmatian_iris, Iris_pallida Persian_iris, Iris_persica Dutch_iris, Iris_tingitana dwarf_iris, vernal_iris, Iris_verna Spanish_iris, xiphium_iris, Iris_xiphium blackberry-lily, leopard_lily, Belamcanda_chinensis crocus saffron, saffron_crocus, Crocus_sativus corn_lily blue-eyed_grass wandflower, Sparaxis_tricolor amaryllis salsilla, Bomarea_edulis salsilla, Bomarea_salsilla blood_lily Cape_tulip, Haemanthus_coccineus hippeastrum, Hippeastrum_puniceum narcissus daffodil, Narcissus_pseudonarcissus jonquil, Narcissus_jonquilla jonquil Jacobean_lily, Aztec_lily, Strekelia_formosissima liliaceous_plant mountain_lily, Lilium_auratum Canada_lily, wild_yellow_lily, meadow_lily, wild_meadow_lily, Lilium_canadense tiger_lily, leopard_lily, pine_lily, Lilium_catesbaei Columbia_tiger_lily, Oregon_lily, Lilium_columbianum tiger_lily, devil_lily, kentan, Lilium_lancifolium Easter_lily, Bermuda_lily, white_trumpet_lily, Lilium_longiflorum coast_lily, Lilium_maritinum Turk's-cap, martagon, Lilium_martagon Michigan_lily, Lilium_michiganense leopard_lily, panther_lily, Lilium_pardalinum Turk's-cap, Turk's_cap-lily, Lilium_superbum African_lily, African_tulip, blue_African_lily, Agapanthus_africanus colicroot, colic_root, crow_corn, star_grass, unicorn_root ague_root, ague_grass, Aletris_farinosa yellow_colicroot, Aletris_aurea alliaceous_plant Hooker's_onion, Allium_acuminatum wild_leek, Levant_garlic, kurrat, Allium_ampeloprasum Canada_garlic, meadow_leek, rose_leek, Allium_canadense keeled_garlic, Allium_carinatum onion shallot, eschalot, multiplier_onion, Allium_cepa_aggregatum, Allium_ascalonicum nodding_onion, nodding_wild_onion, lady's_leek, Allium_cernuum Welsh_onion, Japanese_leek, Allium_fistulosum red-skinned_onion, Allium_haematochiton daffodil_garlic, flowering_onion, Naples_garlic, Allium_neopolitanum few-flowered_leek, Allium_paradoxum garlic, Allium_sativum sand_leek, giant_garlic, Spanish_garlic, rocambole, Allium_scorodoprasum chives, chive, cive, schnittlaugh, Allium_schoenoprasum crow_garlic, false_garlic, field_garlic, stag's_garlic, wild_garlic, Allium_vineale wild_garlic, wood_garlic, Ramsons, Allium_ursinum garlic_chive, Chinese_chive, Oriental_garlic, Allium_tuberosum round-headed_leek, Allium_sphaerocephalum three-cornered_leek, triquetrous_leek, Allium_triquetrum cape_aloe, Aloe_ferox kniphofia, tritoma, flame_flower, flame-flower, flameflower poker_plant, Kniphofia_uvaria red-hot_poker, Kniphofia_praecox fly_poison, Amianthum_muscaetoxicum, Amianthum_muscitoxicum amber_lily, Anthericum_torreyi asparagus, edible_asparagus, Asparagus_officinales asparagus_fern, Asparagus_setaceous, Asparagus_plumosus smilax, Asparagus_asparagoides asphodel Jacob's_rod aspidistra, cast-iron_plant, bar-room_plant, Aspidistra_elatio coral_drops, Bessera_elegans Christmas_bells climbing_onion, Bowiea_volubilis mariposa, mariposa_tulip, mariposa_lily globe_lily, fairy_lantern cat's-ear white_globe_lily, white_fairy_lantern, Calochortus_albus yellow_globe_lily, golden_fairy_lantern, Calochortus_amabilis rose_globe_lily, Calochortus_amoenus star_tulip, elegant_cat's_ears, Calochortus_elegans desert_mariposa_tulip, Calochortus_kennedyi yellow_mariposa_tulip, Calochortus_luteus sagebrush_mariposa_tulip, Calochortus_macrocarpus sego_lily, Calochortus_nuttallii camas, camass, quamash, camosh, camash common_camas, Camassia_quamash Leichtlin's_camas, Camassia_leichtlinii wild_hyacinth, indigo_squill, Camassia_scilloides dogtooth_violet, dogtooth, dog's-tooth_violet white_dogtooth_violet, white_dog's-tooth_violet, blonde_lilian, Erythronium_albidum yellow_adder's_tongue, trout_lily, amberbell, Erythronium_americanum European_dogtooth, Erythronium_dens-canis fawn_lily, Erythronium_californicum glacier_lily, snow_lily, Erythronium_grandiflorum avalanche_lily, Erythronium_montanum fritillary, checkered_lily mission_bells, rice-grain_fritillary, Fritillaria_affinis, Fritillaria_lanceolata, Fritillaria_mutica mission_bells, black_fritillary, Fritillaria_biflora stink_bell, Fritillaria_agrestis crown_imperial, Fritillaria_imperialis white_fritillary, Fritillaria_liliaceae snake's_head_fritillary, guinea-hen_flower, checkered_daffodil, leper_lily, Fritillaria_meleagris adobe_lily, pink_fritillary, Fritillaria_pluriflora scarlet_fritillary, Fritillaria_recurva tulip dwarf_tulip, Tulipa_armena, Tulipa_suaveolens lady_tulip, candlestick_tulip, Tulipa_clusiana Tulipa_gesneriana cottage_tulip Darwin_tulip gloriosa, glory_lily, climbing_lily, creeping_lily, Gloriosa_superba lemon_lily, Hemerocallis_lilio-asphodelus, Hemerocallis_flava common_hyacinth, Hyacinthus_orientalis Roman_hyacinth, Hyacinthus_orientalis_albulus summer_hyacinth, cape_hyacinth, Hyacinthus_candicans, Galtonia_candicans star-of-Bethlehem bath_asparagus, Prussian_asparagus, Ornithogalum_pyrenaicum grape_hyacinth common_grape_hyacinth, Muscari_neglectum tassel_hyacinth, Muscari_comosum scilla, squill spring_squill, Scilla_verna, sea_onion false_asphodel Scotch_asphodel, Tofieldia_pusilla sea_squill, sea_onion, squill, Urginea_maritima squill butcher's_broom, Ruscus_aculeatus bog_asphodel European_bog_asphodel, Narthecium_ossifragum American_bog_asphodel, Narthecium_americanum hellebore, false_hellebore white_hellebore, American_hellebore, Indian_poke, bugbane, Veratrum_viride squaw_grass, bear_grass, Xerophyllum_tenax death_camas, zigadene alkali_grass, Zigadenus_elegans white_camas, Zigadenus_glaucus poison_camas, Zigadenus_nuttalli grassy_death_camas, Zigadenus_venenosus, Zigadenus_venenosus_gramineus prairie_wake-robin, prairie_trillium, Trillium_recurvatum dwarf-white_trillium, snow_trillium, early_wake-robin herb_Paris, Paris_quadrifolia sarsaparilla bullbrier, greenbrier, catbrier, horse_brier, horse-brier, brier, briar, Smilax_rotundifolia rough_bindweed, Smilax_aspera clintonia, Clinton's_lily false_lily_of_the_valley, Maianthemum_canadense false_lily_of_the_valley, Maianthemum_bifolium Solomon's-seal great_Solomon's-seal, Polygonatum_biflorum, Polygonatum_commutatum bellwort, merry_bells, wild_oats strawflower, cornflower, Uvularia_grandiflora pia, Indian_arrowroot, Tacca_leontopetaloides, Tacca_pinnatifida agave, century_plant, American_aloe American_agave, Agave_americana sisal, Agave_sisalana maguey, cantala, Agave_cantala maguey, Agave_atrovirens Agave_tequilana cabbage_tree, grass_tree, Cordyline_australis dracaena tuberose, Polianthes_tuberosa sansevieria, bowstring_hemp African_bowstring_hemp, African_hemp, Sansevieria_guineensis Ceylon_bowstring_hemp, Sansevieria_zeylanica mother-in-law's_tongue, snake_plant, Sansevieria_trifasciata Spanish_bayonet, Yucca_aloifolia Spanish_bayonet, Yucca_baccata Joshua_tree, Yucca_brevifolia soapweed, soap-weed, soap_tree, Yucca_elata Adam's_needle, Adam's_needle-and-thread, spoonleaf_yucca, needle_palm, Yucca_filamentosa bear_grass, Yucca_glauca Spanish_dagger, Yucca_gloriosa Our_Lord's_candle, Yucca_whipplei water_shamrock, buckbean, bogbean, bog_myrtle, marsh_trefoil, Menyanthes_trifoliata butterfly_bush, buddleia yellow_jasmine, yellow_jessamine, Carolina_jasmine, evening_trumpet_flower, Gelsemium_sempervirens flax calabar_bean, ordeal_bean bonduc, bonduc_tree, Caesalpinia_bonduc, Caesalpinia_bonducella divi-divi, Caesalpinia_coriaria Mysore_thorn, Caesalpinia_decapetala, Caesalpinia_sepiaria brazilian_ironwood, Caesalpinia_ferrea bird_of_paradise, poinciana, Caesalpinia_gilliesii, Poinciana_gilliesii shingle_tree, Acrocarpus_fraxinifolius mountain_ebony, orchid_tree, Bauhinia_variegata msasa, Brachystegia_speciformis cassia golden_shower_tree, drumstick_tree, purging_cassia, pudding_pipe_tree, canafistola, canafistula, Cassia_fistula pink_shower, pink_shower_tree, horse_cassia, Cassia_grandis rainbow_shower, Cassia_javonica horse_cassia, Cassia_roxburghii, Cassia_marginata carob, carob_tree, carob_bean_tree, algarroba, Ceratonia_siliqua carob, carob_bean, algarroba_bean, algarroba, locust_bean, locust_pod paloverde royal_poinciana, flamboyant, flame_tree, peacock_flower, Delonix_regia, Poinciana_regia locust_tree, locust water_locust, swamp_locust, Gleditsia_aquatica honey_locust, Gleditsia_triacanthos Kentucky_coffee_tree, bonduc, chicot, Gymnocladus_dioica logwood, logwood_tree, campeachy, bloodwood_tree, Haematoxylum_campechianum Jerusalem_thorn, horsebean, Parkinsonia_aculeata palo_verde, Parkinsonia_florida, Cercidium_floridum Dalmatian_laburnum, Petteria_ramentacea, Cytisus_ramentaceus senna avaram, tanner's_cassia, Senna_auriculata, Cassia_auriculata Alexandria_senna, Alexandrian_senna, true_senna, tinnevelly_senna, Indian_senna, Senna_alexandrina, Cassia_acutifolia, Cassia_augustifolia wild_senna, Senna_marilandica, Cassia_marilandica sicklepod, Senna_obtusifolia, Cassia_tora coffee_senna, mogdad_coffee, styptic_weed, stinking_weed, Senna_occidentalis, Cassia_occidentalis tamarind, tamarind_tree, tamarindo, Tamarindus_indica false_indigo, bastard_indigo, Amorpha_californica false_indigo, bastard_indigo, Amorpha_fruticosa hog_peanut, wild_peanut, Amphicarpaea_bracteata, Amphicarpa_bracteata angelim, andelmin cabbage_bark, cabbage-bark_tree, cabbage_tree, Andira_inermis kidney_vetch, Anthyllis_vulneraria groundnut, groundnut_vine, Indian_potato, potato_bean, wild_bean, Apios_americana, Apios_tuberosa rooibos, Aspalathus_linearis, Aspalathus_cedcarbergensis milk_vetch, milk-vetch alpine_milk_vetch, Astragalus_alpinus purple_milk_vetch, Astragalus_danicus camwood, African_sandalwood, Baphia_nitida wild_indigo, false_indigo blue_false_indigo, Baptisia_australis white_false_indigo, Baptisia_lactea indigo_broom, horsefly_weed, rattle_weed, Baptisia_tinctoria dhak, dak, palas, Butea_frondosa, Butea_monosperma pigeon_pea, pigeon-pea_plant, cajan_pea, catjang_pea, red_gram, dhal, dahl, Cajanus_cajan sword_bean, Canavalia_gladiata pea_tree, caragana Siberian_pea_tree, Caragana_arborescens Chinese_pea_tree, Caragana_sinica Moreton_Bay_chestnut, Australian_chestnut butterfly_pea, Centrosema_virginianum Judas_tree, love_tree, Circis_siliquastrum redbud, Cercis_canadensis western_redbud, California_redbud, Cercis_occidentalis tagasaste, Chamaecytisus_palmensis, Cytesis_proliferus weeping_tree_broom flame_pea chickpea, chickpea_plant, Egyptian_pea, Cicer_arietinum chickpea, garbanzo Kentucky_yellowwood, gopherwood, Cladrastis_lutea, Cladrastis_kentukea glory_pea, clianthus desert_pea, Sturt_pea, Sturt's_desert_pea, Clianthus_formosus, Clianthus_speciosus parrot's_beak, parrot's_bill, Clianthus_puniceus butterfly_pea, Clitoria_mariana blue_pea, butterfly_pea, Clitoria_turnatea telegraph_plant, semaphore_plant, Codariocalyx_motorius, Desmodium_motorium, Desmodium_gyrans bladder_senna, Colutea_arborescens axseed, crown_vetch, Coronilla_varia crotalaria, rattlebox guar, cluster_bean, Cyamopsis_tetragonolobus, Cyamopsis_psoraloides white_broom, white_Spanish_broom, Cytisus_albus, Cytisus_multiflorus common_broom, Scotch_broom, green_broom, Cytisus_scoparius rosewood, rosewood_tree Indian_blackwood, East_Indian_rosewood, East_India_rosewood, Indian_rosewood, Dalbergia_latifolia sissoo, sissu, sisham, Dalbergia_sissoo kingwood, kingwood_tree, Dalbergia_cearensis Brazilian_rosewood, caviuna_wood, jacaranda, Dalbergia_nigra cocobolo, Dalbergia_retusa blackwood, blackwood_tree bitter_pea derris derris_root, tuba_root, Derris_elliptica prairie_mimosa, prickle-weed, Desmanthus_ilinoensis tick_trefoil, beggar_lice, beggar's_lice beggarweed, Desmodium_tortuosum, Desmodium_purpureum Australian_pea, Dipogon_lignosus, Dolichos_lignosus coral_tree, erythrina kaffir_boom, Cape_kafferboom, Erythrina_caffra coral_bean_tree, Erythrina_corallodendrum ceibo, crybaby_tree, cry-baby_tree, common_coral_tree, Erythrina_crista-galli kaffir_boom, Transvaal_kafferboom, Erythrina_lysistemon Indian_coral_tree, Erythrina_variegata, Erythrina_Indica cork_tree, Erythrina_vespertilio goat's_rue, goat_rue, Galega_officinalis poison_bush, poison_pea, gastrolobium Spanish_broom, Spanish_gorse, Genista_hispanica woodwaxen, dyer's_greenweed, dyer's-broom, dyeweed, greenweed, whin, woadwaxen, Genista_tinctoria chanar, chanal, Geoffroea_decorticans gliricidia soy, soybean, soya_bean licorice, liquorice, Glycyrrhiza_glabra wild_licorice, wild_liquorice, American_licorice, American_liquorice, Glycyrrhiza_lepidota licorice_root Western_Australia_coral_pea, Hardenbergia_comnptoniana sweet_vetch, Hedysarum_boreale French_honeysuckle, sulla, Hedysarum_coronarium anil, Indigofera_suffruticosa, Indigofera_anil scarlet_runner, running_postman, Kennedia_prostrata hyacinth_bean, bonavist, Indian_bean, Egyptian_bean, Lablab_purpureus, Dolichos_lablab Scotch_laburnum, Alpine_golden_chain, Laburnum_alpinum vetchling wild_pea everlasting_pea beach_pea, sea_pea, Lathyrus_maritimus, Lathyrus_japonicus grass_vetch, grass_vetchling, Lathyrus_nissolia marsh_pea, Lathyrus_palustris common_vetchling, meadow_pea, yellow_vetchling, Lathyrus_pratensis grass_pea, Indian_pea, khesari, Lathyrus_sativus Tangier_pea, Tangier_peavine, Lalthyrus_tingitanus heath_pea, earth-nut_pea, earthnut_pea, tuberous_vetch, Lathyrus_tuberosus bicolor_lespediza, ezo-yama-hagi, Lespedeza_bicolor japanese_clover, japan_clover, jap_clover, Lespedeza_striata Korean_lespedeza, Lespedeza_stipulacea sericea_lespedeza, Lespedeza_sericea, Lespedeza_cuneata lentil, lentil_plant, Lens_culinaris lentil prairie_bird's-foot_trefoil, compass_plant, prairie_lotus, prairie_trefoil, Lotus_americanus bird's_foot_trefoil, bird's_foot_clover, babies'_slippers, bacon_and_eggs, Lotus_corniculatus winged_pea, asparagus_pea, Lotus_tetragonolobus lupine, lupin white_lupine, field_lupine, wolf_bean, Egyptian_lupine, Lupinus_albus tree_lupine, Lupinus_arboreus wild_lupine, sundial_lupine, Indian_beet, old-maid's_bonnet, Lupinus_perennis bluebonnet, buffalo_clover, Texas_bluebonnet, Lupinus_subcarnosus Texas_bluebonnet, Lupinus_texensis medic, medick, trefoil moon_trefoil, Medicago_arborea sickle_alfalfa, sickle_lucerne, sickle_medick, Medicago_falcata Calvary_clover, Medicago_intertexta, Medicago_echinus black_medick, hop_clover, yellow_trefoil, nonesuch_clover, Medicago_lupulina alfalfa, lucerne, Medicago_sativa millettia mucuna cowage, velvet_bean, Bengal_bean, Benghal_bean, Florida_bean, Mucuna_pruriens_utilis, Mucuna_deeringiana, Mucuna_aterrima, Stizolobium_deeringiana tolu_tree, tolu_balsam_tree, Myroxylon_balsamum, Myroxylon_toluiferum Peruvian_balsam, Myroxylon_pereirae, Myroxylon_balsamum_pereirae sainfoin, sanfoin, holy_clover, esparcet, Onobrychis_viciifolia, Onobrychis_viciaefolia restharrow, rest-harrow, Ononis_repens bead_tree, jumby_bean, jumby_tree, Ormosia_monosperma jumby_bead, jumbie_bead, Ormosia_coarctata locoweed, crazyweed, crazy_weed purple_locoweed, purple_loco, Oxytropis_lambertii tumbleweed yam_bean, Pachyrhizus_erosus shamrock_pea, Parochetus_communis pole_bean kidney_bean, frijol, frijole haricot wax_bean scarlet_runner, scarlet_runner_bean, Dutch_case-knife_bean, runner_bean, Phaseolus_coccineus, Phaseolus_multiflorus lima_bean, lima_bean_plant, Phaseolus_limensis sieva_bean, butter_bean, butter-bean_plant, lima_bean, Phaseolus_lunatus tepary_bean, Phaseolus_acutifolius_latifolius chaparral_pea, stingaree-bush, Pickeringia_montana Jamaica_dogwood, fish_fuddle, Piscidia_piscipula, Piscidia_erythrina pea garden_pea edible-pod_pea, edible-podded_pea, Pisum_sativum_macrocarpon sugar_snap_pea, snap_pea field_pea, field-pea_plant, Austrian_winter_pea, Pisum_sativum_arvense, Pisum_arvense field_pea common_flat_pea, native_holly, Playlobium_obtusangulum quira roble, Platymiscium_trinitatis Panama_redwood_tree, Panama_redwood, Platymiscium_pinnatum Indian_beech, Pongamia_glabra winged_bean, winged_pea, goa_bean, goa_bean_vine, Manila_bean, Psophocarpus_tetragonolobus breadroot, Indian_breadroot, pomme_blanche, pomme_de_prairie, Psoralea_esculenta bloodwood_tree, kiaat, Pterocarpus_angolensis kino, Pterocarpus_marsupium red_sandalwood, red_sanders, red_sanderswood, red_saunders, Pterocarpus_santalinus kudzu, kudzu_vine, Pueraria_lobata bristly_locust, rose_acacia, moss_locust, Robinia_hispida black_locust, yellow_locust, Robinia_pseudoacacia clammy_locust, Robinia_viscosa carib_wood, Sabinea_carinalis Colorado_River_hemp, Sesbania_exaltata scarlet_wisteria_tree, vegetable_hummingbird, Sesbania_grandiflora Japanese_pagoda_tree, Chinese_scholartree, Chinese_scholar_tree, Sophora_japonica, Sophora_sinensis mescal_bean, coral_bean, frijolito, frijolillo, Sophora_secundiflora kowhai, Sophora_tetraptera jade_vine, emerald_creeper, Strongylodon_macrobotrys hoary_pea bastard_indigo, Tephrosia_purpurea catgut, goat's_rue, wild_sweet_pea, Tephrosia_virginiana bush_pea false_lupine, golden_pea, yellow_pea, Thermopsis_macrophylla Carolina_lupine, Thermopsis_villosa tipu, tipu_tree, yellow_jacaranda, pride_of_Bolivia bird's_foot_trefoil, Trigonella_ornithopodioides fenugreek, Greek_clover, Trigonella_foenumgraecum gorse, furze, whin, Irish_gorse, Ulex_europaeus vetch tufted_vetch, bird_vetch, Calnada_pea, Vicia_cracca broad_bean, fava_bean, horsebean bitter_betch, Vicia_orobus bush_vetch, Vicia_sepium moth_bean, Vigna_aconitifolia, Phaseolus_aconitifolius snailflower, snail-flower, snail_flower, snail_bean, corkscrew_flower, Vigna_caracalla, Phaseolus_caracalla mung, mung_bean, green_gram, golden_gram, Vigna_radiata, Phaseolus_aureus cowpea, cowpea_plant, black-eyed_pea, Vigna_unguiculata, Vigna_sinensis cowpea, black-eyed_pea asparagus_bean, yard-long_bean, Vigna_unguiculata_sesquipedalis, Vigna_sesquipedalis swamp_oak, Viminaria_juncea, Viminaria_denudata keurboom, Virgilia_capensis, Virgilia_oroboides keurboom, Virgilia_divaricata Japanese_wistaria, Wisteria_floribunda Chinese_wistaria, Wisteria_chinensis American_wistaria, American_wisteria, Wisteria_frutescens silky_wisteria, Wisteria_venusta palm, palm_tree sago_palm feather_palm fan_palm palmetto coyol, coyol_palm, Acrocomia_vinifera grugru, gri-gri, grugru_palm, macamba, Acrocomia_aculeata areca betel_palm, Areca_catechu sugar_palm, gomuti, gomuti_palm, Arenga_pinnata piassava_palm, pissaba_palm, Bahia_piassava, bahia_coquilla, Attalea_funifera coquilla_nut palmyra, palmyra_palm, toddy_palm, wine_palm, lontar, longar_palm, Borassus_flabellifer calamus rattan, rattan_palm, Calamus_rotang lawyer_cane, Calamus_australis fishtail_palm wine_palm, jaggery_palm, kitul, kittul, kitul_tree, toddy_palm, Caryota_urens wax_palm, Ceroxylon_andicola, Ceroxylon_alpinum coconut, coconut_palm, coco_palm, coco, cocoa_palm, coconut_tree, Cocos_nucifera carnauba, carnauba_palm, wax_palm, Copernicia_prunifera, Copernicia_cerifera caranday, caranda, caranda_palm, wax_palm, Copernicia_australis, Copernicia_alba corozo, corozo_palm gebang_palm, Corypha_utan, Corypha_gebanga latanier, latanier_palm talipot, talipot_palm, Corypha_umbraculifera oil_palm African_oil_palm, Elaeis_guineensis American_oil_palm, Elaeis_oleifera palm_nut, palm_kernel cabbage_palm, Euterpe_oleracea cabbage_palm, cabbage_tree, Livistona_australis true_sago_palm, Metroxylon_sagu nipa_palm, Nipa_fruticans babassu, babassu_palm, coco_de_macao, Orbignya_phalerata, Orbignya_spesiosa, Orbignya_martiana babassu_nut cohune_palm, Orbignya_cohune, cohune cohune_nut date_palm, Phoenix_dactylifera ivory_palm, ivory-nut_palm, ivory_plant, Phytelephas_macrocarpa raffia_palm, Raffia_farinifera, Raffia_ruffia bamboo_palm, Raffia_vinifera lady_palm miniature_fan_palm, bamboo_palm, fern_rhapis, Rhapis_excelsa reed_rhapis, slender_lady_palm, Rhapis_humilis royal_palm, Roystonea_regia cabbage_palm, Roystonea_oleracea cabbage_palmetto, cabbage_palm, Sabal_palmetto saw_palmetto, scrub_palmetto, Serenoa_repens thatch_palm, thatch_tree, silver_thatch, broom_palm, Thrinax_parviflora key_palm, silvertop_palmetto, silver_thatch, Thrinax_microcarpa, Thrinax_morrisii, Thrinax_keyensis English_plantain, narrow-leaved_plantain, ribgrass, ribwort, ripple-grass, buckthorn, Plantago_lanceolata broad-leaved_plantain, common_plantain, white-man's_foot, whiteman's_foot, cart-track_plant, Plantago_major hoary_plantain, Plantago_media fleawort, psyllium, Spanish_psyllium, Plantago_psyllium rugel's_plantain, broad-leaved_plantain, Plantago_rugelii hoary_plantain, Plantago_virginica buckwheat, Polygonum_fagopyrum, Fagopyrum_esculentum prince's-feather, princess_feather, kiss-me-over-the-garden-gate, prince's-plume, Polygonum_orientale eriogonum umbrella_plant, Eriogonum_allenii wild_buckwheat, California_buckwheat, Erigonum_fasciculatum rhubarb, rhubarb_plant Himalayan_rhubarb, Indian_rhubarb, red-veined_pie_plant, Rheum_australe, Rheum_emodi pie_plant, garden_rhubarb, Rheum_cultorum, Rheum_rhabarbarum, Rheum_rhaponticum Chinese_rhubarb, Rheum_palmatum sour_dock, garden_sorrel, Rumex_acetosa sheep_sorrel, sheep's_sorrel, Rumex_acetosella bitter_dock, broad-leaved_dock, yellow_dock, Rumex_obtusifolius French_sorrel, garden_sorrel, Rumex_scutatus yellow-eyed_grass commelina spiderwort, dayflower pineapple, pineapple_plant, Ananas_comosus pipewort, Eriocaulon_aquaticum water_hyacinth, water_orchid, Eichhornia_crassipes, Eichhornia_spesiosa water_star_grass, mud_plantain, Heteranthera_dubia naiad, water_nymph water_plantain, Alisma_plantago-aquatica narrow-leaved_water_plantain hydrilla, Hydrilla_verticillata American_frogbit, Limnodium_spongia waterweed Canadian_pondweed, Elodea_canadensis tape_grass, eelgrass, wild_celery, Vallisneria_spiralis pondweed curled_leaf_pondweed, curly_pondweed, Potamogeton_crispus loddon_pondweed, Potamogeton_nodosus, Potamogeton_americanus frog's_lettuce arrow_grass, Triglochin_maritima horned_pondweed, Zannichellia_palustris eelgrass, grass_wrack, sea_wrack, Zostera_marina rose, rosebush hip, rose_hip, rosehip banksia_rose, Rosa_banksia damask_rose, summer_damask_rose, Rosa_damascena sweetbrier, sweetbriar, brier, briar, eglantine, Rosa_eglanteria Cherokee_rose, Rosa_laevigata musk_rose, Rosa_moschata agrimonia, agrimony harvest-lice, Agrimonia_eupatoria fragrant_agrimony, Agrimonia_procera alderleaf_Juneberry, alder-leaved_serviceberry, Amelanchier_alnifolia flowering_quince japonica, maule's_quince, Chaenomeles_japonica coco_plum, coco_plum_tree, cocoa_plum, icaco, Chrysobalanus_icaco cotoneaster Cotoneaster_dammeri Cotoneaster_horizontalis parsley_haw, parsley-leaved_thorn, Crataegus_apiifolia, Crataegus_marshallii scarlet_haw, Crataegus_biltmoreana blackthorn, pear_haw, pear_hawthorn, Crataegus_calpodendron, Crataegus_tomentosa cockspur_thorn, cockspur_hawthorn, Crataegus_crus-galli mayhaw, summer_haw, Crataegus_aestivalis red_haw, downy_haw, Crataegus_mollis, Crataegus_coccinea_mollis red_haw, Crataegus_pedicellata, Crataegus_coccinea quince, quince_bush, Cydonia_oblonga mountain_avens, Dryas_octopetala loquat, loquat_tree, Japanese_medlar, Japanese_plum, Eriobotrya_japonica beach_strawberry, Chilean_strawberry, Fragaria_chiloensis Virginia_strawberry, scarlet_strawberry, Fragaria_virginiana avens yellow_avens, Geum_alleppicum_strictum, Geum_strictum yellow_avens, Geum_macrophyllum prairie_smoke, purple_avens, Geum_triflorum bennet, white_avens, Geum_virginianum toyon, tollon, Christmasberry, Christmas_berry, Heteromeles_arbutifolia, Photinia_arbutifolia apple_tree apple, orchard_apple_tree, Malus_pumila wild_apple, crab_apple, crabapple crab_apple, crabapple, cultivated_crab_apple Siberian_crab, Siberian_crab_apple, cherry_apple, cherry_crab, Malus_baccata wild_crab, Malus_sylvestris American_crab_apple, garland_crab, Malus_coronaria Oregon_crab_apple, Malus_fusca Southern_crab_apple, flowering_crab, Malus_angustifolia Iowa_crab, Iowa_crab_apple, prairie_crab, western_crab_apple, Malus_ioensis Bechtel_crab, flowering_crab medlar, medlar_tree, Mespilus_germanica cinquefoil, five-finger silverweed, goose-tansy, goose_grass, Potentilla_anserina salad_burnet, burnet_bloodwort, pimpernel, Poterium_sanguisorba plum, plum_tree wild_plum, wild_plum_tree Allegheny_plum, Alleghany_plum, sloe, Prunus_alleghaniensis American_red_plum, August_plum, goose_plum, Prunus_americana chickasaw_plum, hog_plum, hog_plum_bush, Prunus_angustifolia beach_plum, beach_plum_bush, Prunus_maritima common_plum, Prunus_domestica bullace, Prunus_insititia damson_plum, damson_plum_tree, Prunus_domestica_insititia big-tree_plum, Prunus_mexicana Canada_plum, Prunus_nigra plumcot, plumcot_tree apricot, apricot_tree Japanese_apricot, mei, Prunus_mume common_apricot, Prunus_armeniaca purple_apricot, black_apricot, Prunus_dasycarpa cherry, cherry_tree wild_cherry, wild_cherry_tree wild_cherry sweet_cherry, Prunus_avium heart_cherry, oxheart, oxheart_cherry gean, mazzard, mazzard_cherry capulin, capulin_tree, Prunus_capuli cherry_laurel, laurel_cherry, mock_orange, wild_orange, Prunus_caroliniana cherry_plum, myrobalan, myrobalan_plum, Prunus_cerasifera sour_cherry, sour_cherry_tree, Prunus_cerasus amarelle, Prunus_cerasus_caproniana morello, Prunus_cerasus_austera marasca almond_tree almond, sweet_almond, Prunus_dulcis, Prunus_amygdalus, Amygdalus_communis bitter_almond, Prunus_dulcis_amara, Amygdalus_communis_amara jordan_almond dwarf_flowering_almond, Prunus_glandulosa holly-leaved_cherry, holly-leaf_cherry, evergreen_cherry, islay, Prunus_ilicifolia fuji, fuji_cherry, Prunus_incisa flowering_almond, oriental_bush_cherry, Prunus_japonica cherry_laurel, laurel_cherry, Prunus_laurocerasus Catalina_cherry, Prunus_lyonii bird_cherry, bird_cherry_tree hagberry_tree, European_bird_cherry, common_bird_cherry, Prunus_padus hagberry pin_cherry, Prunus_pensylvanica peach, peach_tree, Prunus_persica nectarine, nectarine_tree, Prunus_persica_nectarina sand_cherry, Prunus_pumila, Prunus_pumilla_susquehanae, Prunus_susquehanae, Prunus_cuneata Japanese_plum, Prunus_salicina black_cherry, black_cherry_tree, rum_cherry, Prunus_serotina flowering_cherry oriental_cherry, Japanese_cherry, Japanese_flowering_cherry, Prunus_serrulata Japanese_flowering_cherry, Prunus_sieboldii Sierra_plum, Pacific_plum, Prunus_subcordata rosebud_cherry, winter_flowering_cherry, Prunus_subhirtella Russian_almond, dwarf_Russian_almond, Prunus_tenella flowering_almond, Prunus_triloba chokecherry, chokecherry_tree, Prunus_virginiana chokecherry western_chokecherry, Prunus_virginiana_demissa, Prunus_demissa Pyracantha, pyracanth, fire_thorn, firethorn pear, pear_tree, Pyrus_communis fruit_tree bramble_bush lawyerbush, lawyer_bush, bush_lawyer, Rubus_cissoides, Rubus_australis stone_bramble, Rubus_saxatilis sand_blackberry, Rubus_cuneifolius boysenberry, boysenberry_bush loganberry, Rubus_loganobaccus, Rubus_ursinus_loganobaccus American_dewberry, Rubus_canadensis Northern_dewberry, American_dewberry, Rubus_flagellaris Southern_dewberry, Rubus_trivialis swamp_dewberry, swamp_blackberry, Rubus_hispidus European_dewberry, Rubus_caesius raspberry, raspberry_bush wild_raspberry, European_raspberry, framboise, Rubus_idaeus American_raspberry, Rubus_strigosus, Rubus_idaeus_strigosus black_raspberry, blackcap, blackcap_raspberry, thimbleberry, Rubus_occidentalis salmonberry, Rubus_spectabilis salmonberry, salmon_berry, thimbleberry, Rubus_parviflorus wineberry, Rubus_phoenicolasius mountain_ash rowan, rowan_tree, European_mountain_ash, Sorbus_aucuparia rowanberry American_mountain_ash, Sorbus_americana Western_mountain_ash, Sorbus_sitchensis service_tree, sorb_apple, sorb_apple_tree, Sorbus_domestica wild_service_tree, Sorbus_torminalis spirea, spiraea bridal_wreath, bridal-wreath, Saint_Peter's_wreath, St._Peter's_wreath, Spiraea_prunifolia madderwort, rubiaceous_plant Indian_madder, munjeet, Rubia_cordifolia madder, Rubia_tinctorum woodruff dagame, lemonwood_tree, Calycophyllum_candidissimum blolly, West_Indian_snowberry, Chiococca_alba coffee, coffee_tree Arabian_coffee, Coffea_arabica Liberian_coffee, Coffea_liberica robusta_coffee, Rio_Nunez_coffee, Coffea_robusta, Coffea_canephora cinchona, chinchona Cartagena_bark, Cinchona_cordifolia, Cinchona_lancifolia calisaya, Cinchona_officinalis, Cinchona_ledgeriana, Cinchona_calisaya cinchona_tree, Cinchona_pubescens cinchona, cinchona_bark, Peruvian_bark, Jesuit's_bark bedstraw sweet_woodruff, waldmeister, woodruff, fragrant_bedstraw, Galium_odoratum, Asperula_odorata Northern_bedstraw, Northern_snow_bedstraw, Galium_boreale yellow_bedstraw, yellow_cleavers, Our_Lady's_bedstraw, Galium_verum wild_licorice, Galium_lanceolatum cleavers, clivers, goose_grass, catchweed, spring_cleavers, Galium_aparine wild_madder, white_madder, white_bedstraw, infant's-breath, false_baby's_breath, Galium_mollugo cape_jasmine, cape_jessamine, Gardenia_jasminoides, Gardenia_augusta genipa genipap_fruit, jagua, marmalade_box, Genipa_Americana hamelia scarlet_bush, scarlet_hamelia, coloradillo, Hamelia_patens, Hamelia_erecta lemonwood, lemon-wood, lemonwood_tree, lemon-wood_tree, Psychotria_capensis negro_peach, Sarcocephalus_latifolius, Sarcocephalus_esculentus wild_medlar, wild_medlar_tree, medlar, Vangueria_infausta Spanish_tamarind, Vangueria_madagascariensis abelia bush_honeysuckle, Diervilla_sessilifolia American_twinflower, Linnaea_borealis_americana honeysuckle American_fly_honeysuckle, fly_honeysuckle, Lonicera_canadensis Italian_honeysuckle, Italian_woodbine, Lonicera_caprifolium yellow_honeysuckle, Lonicera_flava hairy_honeysuckle, Lonicera_hirsuta Japanese_honeysuckle, Lonicera_japonica Hall's_honeysuckle, Lonicera_japonica_halliana Morrow's_honeysuckle, Lonicera_morrowii woodbine, Lonicera_periclymenum trumpet_honeysuckle, coral_honeysuckle, trumpet_flower, trumpet_vine, Lonicera_sempervirens European_fly_honeysuckle, European_honeysuckle, Lonicera_xylosteum swamp_fly_honeysuckle snowberry, common_snowberry, waxberry, Symphoricarpos_alba coralberry, Indian_currant, Symphoricarpos_orbiculatus blue_elder, blue_elderberry, Sambucus_caerulea dwarf_elder, danewort, Sambucus_ebulus American_red_elder, red-berried_elder, stinking_elder, Sambucus_pubens European_red_elder, red-berried_elder, Sambucus_racemosa feverroot, horse_gentian, tinker's_root, wild_coffee, Triostium_perfoliatum cranberry_bush, cranberry_tree, American_cranberry_bush, highbush_cranberry, Viburnum_trilobum wayfaring_tree, twist_wood, twistwood, Viburnum_lantana guelder_rose, European_cranberrybush, European_cranberry_bush, crampbark, cranberry_tree, Viburnum_opulus arrow_wood, Viburnum_recognitum black_haw, Viburnum_prunifolium weigela, Weigela_florida teasel, teazel, teasle common_teasel, Dipsacus_fullonum fuller's_teasel, Dipsacus_sativus wild_teasel, Dipsacus_sylvestris scabious, scabiosa sweet_scabious, pincushion_flower, mournful_widow, Scabiosa_atropurpurea field_scabious, Scabiosa_arvensis jewelweed, lady's_earrings, orange_balsam, celandine, touch-me-not, Impatiens_capensis geranium cranesbill, crane's_bill wild_geranium, spotted_cranesbill, Geranium_maculatum meadow_cranesbill, Geranium_pratense Richardson's_geranium, Geranium_richardsonii herb_robert, herbs_robert, herb_roberts, Geranium_robertianum sticky_geranium, Geranium_viscosissimum dove's_foot_geranium, Geranium_molle rose_geranium, sweet-scented_geranium, Pelargonium_graveolens fish_geranium, bedding_geranium, zonal_pelargonium, Pelargonium_hortorum ivy_geranium, ivy-leaved_geranium, hanging_geranium, Pelargonium_peltatum apple_geranium, nutmeg_geranium, Pelargonium_odoratissimum lemon_geranium, Pelargonium_limoneum storksbill, heron's_bill musk_clover, muskus_grass, white-stemmed_filaree, Erodium_moschatum incense_tree elephant_tree, Bursera_microphylla gumbo-limbo, Bursera_simaruba Boswellia_carteri salai, Boswellia_serrata balm_of_gilead, Commiphora_meccanensis myrrh_tree, Commiphora_myrrha Protium_heptaphyllum Protium_guianense water_starwort barbados_cherry, acerola, Surinam_cherry, West_Indian_cherry, Malpighia_glabra mahogany, mahogany_tree chinaberry, chinaberry_tree, China_tree, Persian_lilac, pride-of-India, azederach, azedarach, Melia_azederach, Melia_azedarach neem, neem_tree, nim_tree, margosa, arishth, Azadirachta_indica, Melia_Azadirachta neem_seed Spanish_cedar, Spanish_cedar_tree, Cedrela_odorata satinwood, satinwood_tree, Chloroxylon_swietenia African_scented_mahogany, cedar_mahogany, sapele_mahogany, Entandrophragma_cylindricum silver_ash native_beech, flindosa, flindosy, Flindersia_australis bunji-bunji, Flindersia_schottiana African_mahogany lanseh_tree, langsat, langset, Lansium_domesticum true_mahogany, Cuban_mahogany, Dominican_mahogany, Swietinia_mahogani Honduras_mahogany, Swietinia_macrophylla Philippine_mahogany, Philippine_cedar, kalantas, Toona_calantas, Cedrela_calantas caracolito, Ruptiliocarpon_caracolito common_wood_sorrel, cuckoo_bread, shamrock, Oxalis_acetosella Bermuda_buttercup, English-weed, Oxalis_pes-caprae, Oxalis_cernua creeping_oxalis, creeping_wood_sorrel, Oxalis_corniculata goatsfoot, goat's_foot, Oxalis_caprina violet_wood_sorrel, Oxalis_violacea oca, oka, Oxalis_tuberosa, Oxalis_crenata carambola, carambola_tree, Averrhoa_carambola bilimbi, Averrhoa_bilimbi milkwort senega, Polygala_alba orange_milkwort, yellow_milkwort, candyweed, yellow_bachelor's_button, Polygala_lutea flowering_wintergreen, gaywings, bird-on-the-wing, fringed_polygala, Polygala_paucifolia Seneca_snakeroot, Seneka_snakeroot, senga_root, senega_root, senega_snakeroot, Polygala_senega common_milkwort, gand_flower, Polygala_vulgaris rue, herb_of_grace, Ruta_graveolens citrus, citrus_tree orange, orange_tree sour_orange, Seville_orange, bitter_orange, bitter_orange_tree, bigarade, marmalade_orange, Citrus_aurantium bergamot, bergamot_orange, Citrus_bergamia pomelo, pomelo_tree, pummelo, shaddock, Citrus_maxima, Citrus_grandis, Citrus_decumana citron, citron_tree, Citrus_medica grapefruit, Citrus_paradisi mandarin, mandarin_orange, mandarin_orange_tree, Citrus_reticulata tangerine, tangerine_tree clementine, clementine_tree satsuma, satsuma_tree sweet_orange, sweet_orange_tree, Citrus_sinensis temple_orange, temple_orange_tree, tangor, king_orange, Citrus_nobilis tangelo, tangelo_tree, ugli_fruit, Citrus_tangelo rangpur, rangpur_lime, lemanderin, Citrus_limonia lemon, lemon_tree, Citrus_limon sweet_lemon, sweet_lime, Citrus_limetta lime, lime_tree, Citrus_aurantifolia citrange, citrange_tree, Citroncirus_webberi fraxinella, dittany, burning_bush, gas_plant, Dictamnus_alba kumquat, cumquat, kumquat_tree marumi, marumi_kumquat, round_kumquat, Fortunella_japonica nagami, nagami_kumquat, oval_kumquat, Fortunella_margarita cork_tree, Phellodendron_amurense trifoliate_orange, trifoliata, wild_orange, Poncirus_trifoliata prickly_ash toothache_tree, sea_ash, Zanthoxylum_americanum, Zanthoxylum_fraxineum Hercules'-club, Hercules'-clubs, Hercules-club, Zanthoxylum_clava-herculis bitterwood_tree marupa, Simarouba_amara paradise_tree, bitterwood, Simarouba_glauca ailanthus tree_of_heaven, tree_of_the_gods, Ailanthus_altissima wild_mango, dika, wild_mango_tree, Irvingia_gabonensis pepper_tree, Kirkia_wilmsii Jamaica_quassia, bitterwood, Picrasma_excelsa, Picrasma_excelsum quassia, bitterwood, Quassia_amara nasturtium garden_nasturtium, Indian_cress, Tropaeolum_majus bush_nasturtium, Tropaeolum_minus canarybird_flower, canarybird_vine, canary_creeper, Tropaeolum_peregrinum bean_caper, Syrian_bean_caper, Zygophyllum_fabago palo_santo, Bulnesia_sarmienti lignum_vitae, Guaiacum_officinale creosote_bush, coville, hediondilla, Larrea_tridentata caltrop, devil's_weed, Tribulus_terestris willow, willow_tree osier white_willow, Huntingdon_willow, Salix_alba silver_willow, silky_willow, Salix_alba_sericea, Salix_sericea golden_willow, Salix_alba_vitellina, Salix_vitellina cricket-bat_willow, Salix_alba_caerulea arctic_willow, Salix_arctica weeping_willow, Babylonian_weeping_willow, Salix_babylonica Wisconsin_weeping_willow, Salix_pendulina, Salix_blanda, Salix_pendulina_blanda pussy_willow, Salix_discolor sallow goat_willow, florist's_willow, pussy_willow, Salix_caprea peachleaf_willow, peach-leaved_willow, almond-leaves_willow, Salix_amygdaloides almond_willow, black_Hollander, Salix_triandra, Salix_amygdalina hoary_willow, sage_willow, Salix_candida crack_willow, brittle_willow, snap_willow, Salix_fragilis prairie_willow, Salix_humilis dwarf_willow, Salix_herbacea grey_willow, gray_willow, Salix_cinerea arroyo_willow, Salix_lasiolepis shining_willow, Salix_lucida swamp_willow, black_willow, Salix_nigra bay_willow, laurel_willow, Salix_pentandra purple_willow, red_willow, red_osier, basket_willow, purple_osier, Salix_purpurea balsam_willow, Salix_pyrifolia creeping_willow, Salix_repens Sitka_willow, silky_willow, Salix_sitchensis dwarf_grey_willow, dwarf_gray_willow, sage_willow, Salix_tristis bearberry_willow, Salix_uva-ursi common_osier, hemp_willow, velvet_osier, Salix_viminalis poplar, poplar_tree balsam_poplar, hackmatack, tacamahac, Populus_balsamifera white_poplar, white_aspen, abele, aspen_poplar, silver-leaved_poplar, Populus_alba grey_poplar, gray_poplar, Populus_canescens black_poplar, Populus_nigra Lombardy_poplar, Populus_nigra_italica cottonwood Eastern_cottonwood, necklace_poplar, Populus_deltoides black_cottonwood, Western_balsam_poplar, Populus_trichocarpa swamp_cottonwood, black_cottonwood, downy_poplar, swamp_poplar, Populus_heterophylla aspen quaking_aspen, European_quaking_aspen, Populus_tremula American_quaking_aspen, American_aspen, Populus_tremuloides Canadian_aspen, bigtooth_aspen, bigtoothed_aspen, big-toothed_aspen, large-toothed_aspen, large_tooth_aspen, Populus_grandidentata sandalwood_tree, true_sandalwood, Santalum_album quandong, quandang, quandong_tree, Eucarya_acuminata, Fusanus_acuminatus rabbitwood, buffalo_nut, Pyrularia_pubera Loranthaceae, family_Loranthaceae, mistletoe_family mistletoe, Loranthus_europaeus American_mistletoe, Arceuthobium_pusillum mistletoe, Viscum_album, Old_World_mistletoe American_mistletoe, Phoradendron_serotinum, Phoradendron_flavescens aalii soapberry, soapberry_tree wild_China_tree, Sapindus_drumondii, Sapindus_marginatus China_tree, false_dogwood, jaboncillo, chinaberry, Sapindus_saponaria akee, akee_tree, Blighia_sapida soapberry_vine heartseed, Cardiospermum_grandiflorum balloon_vine, heart_pea, Cardiospermum_halicacabum longan, lungen, longanberry, Dimocarpus_longan, Euphorbia_litchi, Nephelium_longana harpullia harpulla, Harpullia_cupanioides Moreton_Bay_tulipwood, Harpullia_pendula litchi, lichee, litchi_tree, Litchi_chinensis, Nephelium_litchi Spanish_lime, Spanish_lime_tree, honey_berry, mamoncillo, genip, ginep, Melicocca_bijuga, Melicocca_bijugatus rambutan, rambotan, rambutan_tree, Nephelium_lappaceum pulasan, pulassan, pulasan_tree, Nephelium_mutabile pachysandra Allegheny_spurge, Allegheny_mountain_spurge, Pachysandra_procumbens bittersweet, American_bittersweet, climbing_bittersweet, false_bittersweet, staff_vine, waxwork, shrubby_bittersweet, Celastrus_scandens spindle_tree, spindleberry, spindleberry_tree winged_spindle_tree, Euonymous_alatus wahoo, burning_bush, Euonymus_atropurpureus strawberry_bush, wahoo, Euonymus_americanus evergreen_bittersweet, Euonymus_fortunei_radicans, Euonymus_radicans_vegetus cyrilla, leatherwood, white_titi, Cyrilla_racemiflora titi, buckwheat_tree, Cliftonia_monophylla crowberry maple silver_maple, Acer_saccharinum sugar_maple, rock_maple, Acer_saccharum red_maple, scarlet_maple, swamp_maple, Acer_rubrum moosewood, moose-wood, striped_maple, striped_dogwood, goosefoot_maple, Acer_pennsylvanicum Oregon_maple, big-leaf_maple, Acer_macrophyllum dwarf_maple, Rocky-mountain_maple, Acer_glabrum mountain_maple, mountain_alder, Acer_spicatum vine_maple, Acer_circinatum hedge_maple, field_maple, Acer_campestre Norway_maple, Acer_platanoides sycamore, great_maple, scottish_maple, Acer_pseudoplatanus box_elder, ash-leaved_maple, Acer_negundo California_box_elder, Acer_negundo_Californicum pointed-leaf_maple, Acer_argutum Japanese_maple, full_moon_maple, Acer_japonicum Japanese_maple, Acer_palmatum holly Chinese_holly, Ilex_cornuta bearberry, possum_haw, winterberry, Ilex_decidua inkberry, gallberry, gall-berry, evergreen_winterberry, Ilex_glabra mate, Paraguay_tea, Ilex_paraguariensis American_holly, Christmas_holly low_gallberry_holly tall_gallberry_holly yaupon_holly deciduous_holly juneberry_holly largeleaf_holly Geogia_holly common_winterberry_holly smooth_winterberry_holly cashew, cashew_tree, Anacardium_occidentale goncalo_alves, Astronium_fraxinifolium Venetian_sumac, wig_tree, Cotinus_coggygria laurel_sumac, Malosma_laurina, Rhus_laurina mango, mango_tree, Mangifera_indica pistachio, Pistacia_vera, pistachio_tree terebinth, Pistacia_terebinthus mastic, mastic_tree, lentisk, Pistacia_lentiscus Australian_sumac, Rhodosphaera_rhodanthema, Rhus_rhodanthema sumac, sumach, shumac smooth_sumac, scarlet_sumac, vinegar_tree, Rhus_glabra sugar-bush, sugar_sumac, Rhus_ovata staghorn_sumac, velvet_sumac, Virginian_sumac, vinegar_tree, Rhus_typhina squawbush, squaw-bush, skunkbush, Rhus_trilobata aroeira_blanca, Schinus_chichita pepper_tree, molle, Peruvian_mastic_tree, Schinus_molle Brazilian_pepper_tree, Schinus_terebinthifolius hog_plum, yellow_mombin, yellow_mombin_tree, Spondias_mombin mombin, mombin_tree, jocote, Spondias_purpurea poison_ash, poison_dogwood, poison_sumac, Toxicodendron_vernix, Rhus_vernix poison_ivy, markweed, poison_mercury, poison_oak, Toxicodendron_radicans, Rhus_radicans western_poison_oak, Toxicodendron_diversilobum, Rhus_diversiloba eastern_poison_oak, Toxicodendron_quercifolium, Rhus_quercifolia, Rhus_toxicodenedron varnish_tree, lacquer_tree, Chinese_lacquer_tree, Japanese_lacquer_tree, Japanese_varnish_tree, Japanese_sumac, Toxicodendron_vernicifluum, Rhus_verniciflua horse_chestnut, buckeye, Aesculus_hippocastanum buckeye, horse_chestnut, conker sweet_buckeye Ohio_buckeye dwarf_buckeye, bottlebrush_buckeye red_buckeye particolored_buckeye ebony, ebony_tree, Diospyros_ebenum marblewood, marble-wood, Andaman_marble, Diospyros_kurzii marblewood, marble-wood persimmon, persimmon_tree Japanese_persimmon, kaki, Diospyros_kaki American_persimmon, possumwood, Diospyros_virginiana date_plum, Diospyros_lotus buckthorn southern_buckthorn, shittimwood, shittim, mock_orange, Bumelia_lycioides false_buckthorn, chittamwood, chittimwood, shittimwood, black_haw, Bumelia_lanuginosa star_apple, caimito, Chrysophyllum_cainito satinleaf, satin_leaf, caimitillo, damson_plum, Chrysophyllum_oliviforme balata, balata_tree, beefwood, bully_tree, Manilkara_bidentata sapodilla, sapodilla_tree, Manilkara_zapota, Achras_zapota gutta-percha_tree, Palaquium_gutta gutta-percha_tree canistel, canistel_tree, Pouteria_campechiana_nervosa marmalade_tree, mammee, sapote, Pouteria_zapota, Calocarpum_zapota sweetleaf, Symplocus_tinctoria Asiatic_sweetleaf, sapphire_berry, Symplocus_paniculata styrax snowbell, Styrax_obassia Japanese_snowbell, Styrax_japonicum Texas_snowbell, Texas_snowbells, Styrax_texana silver-bell_tree, silverbell_tree, snowdrop_tree, opossum_wood, Halesia_carolina, Halesia_tetraptera carnivorous_plant pitcher_plant common_pitcher_plant, huntsman's_cup, huntsman's_cups, Sarracenia_purpurea hooded_pitcher_plant, Sarracenia_minor huntsman's_horn, huntsman's_horns, yellow_trumpet, yellow_pitcher_plant, trumpets, Sarracenia_flava tropical_pitcher_plant sundew, sundew_plant, daily_dew Venus's_flytrap, Venus's_flytraps, Dionaea_muscipula waterwheel_plant, Aldrovanda_vesiculosa Drosophyllum_lusitanicum roridula Australian_pitcher_plant, Cephalotus_follicularis sedum stonecrop rose-root, midsummer-men, Sedum_rosea orpine, orpin, livelong, live-forever, Sedum_telephium pinwheel, Aeonium_haworthii Christmas_bush, Christmas_tree, Ceratopetalum_gummiferum hortensia, Hydrangea_macrophylla_hortensis fall-blooming_hydrangea, Hydrangea_paniculata carpenteria, Carpenteria_californica decumary, Decumaria_barbata, Decumaria_barbara deutzia philadelphus mock_orange, syringa, Philadelphus_coronarius saxifrage, breakstone, rockfoil yellow_mountain_saxifrage, Saxifraga_aizoides meadow_saxifrage, fair-maids-of-France, Saxifraga_granulata mossy_saxifrage, Saxifraga_hypnoides western_saxifrage, Saxifraga_occidentalis purple_saxifrage, Saxifraga_oppositifolia star_saxifrage, starry_saxifrage, Saxifraga_stellaris strawberry_geranium, strawberry_saxifrage, mother-of-thousands, Saxifraga_stolonifera, Saxifraga_sarmentosam astilbe false_goatsbeard, Astilbe_biternata dwarf_astilbe, Astilbe_chinensis_pumila spirea, spiraea, Astilbe_japonica bergenia coast_boykinia, Boykinia_elata, Boykinia_occidentalis golden_saxifrage, golden_spleen umbrella_plant, Indian_rhubarb, Darmera_peltata, Peltiphyllum_peltatum bridal_wreath, bridal-wreath, Francoa_ramosa alumroot, alumbloom coralbells, Heuchera_sanguinea leatherleaf_saxifrage, Leptarrhena_pyrolifolia woodland_star, Lithophragma_affine, Lithophragma_affinis, Tellima_affinis prairie_star, Lithophragma_parviflorum miterwort, mitrewort, bishop's_cap five-point_bishop's_cap, Mitella_pentandra parnassia, grass-of-Parnassus bog_star, Parnassia_palustris fringed_grass_of_Parnassus, Parnassia_fimbriata false_alumroot, fringe_cups, Tellima_grandiflora foamflower, coolwart, false_miterwort, false_mitrewort, Tiarella_cordifolia false_miterwort, false_mitrewort, Tiarella_unifoliata pickaback_plant, piggyback_plant, youth-on-age, Tolmiea_menziesii currant, currant_bush black_currant, European_black_currant, Ribes_nigrum white_currant, Ribes_sativum gooseberry, gooseberry_bush, Ribes_uva-crispa, Ribes_grossularia plane_tree, sycamore, platan London_plane, Platanus_acerifolia American_sycamore, American_plane, buttonwood, Platanus_occidentalis oriental_plane, Platanus_orientalis California_sycamore, Platanus_racemosa Arizona_sycamore, Platanus_wrightii Greek_valerian, Polemonium_reptans northern_Jacob's_ladder, Polemonium_boreale skunkweed, skunk-weed, Polemonium_viscosum phlox moss_pink, mountain_phlox, moss_phlox, dwarf_phlox, Phlox_subulata evening-snow, Linanthus_dichotomus acanthus bear's_breech, bear's_breeches, sea_holly, Acanthus_mollis caricature_plant, Graptophyllum_pictum black-eyed_Susan, black-eyed_Susan_vine, Thunbergia_alata catalpa, Indian_bean Catalpa_bignioides Catalpa_speciosa desert_willow, Chilopsis_linearis calabash, calabash_tree, Crescentia_cujete calabash borage, tailwort, Borago_officinalis common_amsinckia, Amsinckia_intermedia anchusa bugloss, alkanet, Anchusa_officinalis cape_forget-me-not, Anchusa_capensis cape_forget-me-not, Anchusa_riparia Spanish_elm, Equador_laurel, salmwood, cypre, princewood, Cordia_alliodora princewood, Spanish_elm, Cordia_gerascanthus Chinese_forget-me-not, Cynoglossum_amabile hound's-tongue, Cynoglossum_officinale hound's-tongue, Cynoglossum_virginaticum blueweed, blue_devil, blue_thistle, viper's_bugloss, Echium_vulgare beggar's_lice, beggar_lice gromwell, Lithospermum_officinale puccoon, Lithospermum_caroliniense Virginia_bluebell, Virginia_cowslip, Mertensia_virginica garden_forget-me-not, Myosotis_sylvatica forget-me-not, mouse_ear, Myosotis_scorpiodes false_gromwell comfrey, cumfrey common_comfrey, boneset, Symphytum_officinale convolvulus bindweed field_bindweed, wild_morning-glory, Convolvulus_arvensis scammony, Convolvulus_scammonia silverweed dodder dichondra, Dichondra_micrantha cypress_vine, star-glory, Indian_pink, Ipomoea_quamoclit, Quamoclit_pennata moonflower, belle_de_nuit, Ipomoea_alba wild_potato_vine, wild_sweet_potato_vine, man-of-the-earth, manroot, scammonyroot, Ipomoea_panurata, Ipomoea_fastigiata red_morning-glory, star_ipomoea, Ipomoea_coccinea man-of-the-earth, Ipomoea_leptophylla scammony, Ipomoea_orizabensis Japanese_morning_glory, Ipomoea_nil imperial_Japanese_morning_glory, Ipomoea_imperialis gesneriad gesneria achimenes, hot_water_plant aeschynanthus lace-flower_vine, Alsobia_dianthiflora, Episcia_dianthiflora columnea episcia gloxinia Canterbury_bell, Gloxinia_perennis kohleria African_violet, Saintpaulia_ionantha streptocarpus Cape_primrose waterleaf Virginia_waterleaf, Shawnee_salad, shawny, Indian_salad, John's_cabbage, Hydrophyllum_virginianum yellow_bells, California_yellow_bells, whispering_bells, Emmanthe_penduliflora yerba_santa, Eriodictyon_californicum nemophila baby_blue-eyes, Nemophila_menziesii five-spot, Nemophila_maculata scorpionweed, scorpion_weed, phacelia California_bluebell, Phacelia_campanularia California_bluebell, whitlavia, Phacelia_minor, Phacelia_whitlavia fiddleneck, Phacelia_tanacetifolia fiesta_flower, Pholistoma_auritum, Nemophila_aurita basil_thyme, basil_balm, mother_of_thyme, Acinos_arvensis, Satureja_acinos giant_hyssop yellow_giant_hyssop, Agastache_nepetoides anise_hyssop, Agastache_foeniculum Mexican_hyssop, Agastache_mexicana bugle, bugleweed creeping_bugle, Ajuga_reptans erect_bugle, blue_bugle, Ajuga_genevensis pyramid_bugle, Ajuga_pyramidalis wood_mint hairy_wood_mint, Blephilia_hirsuta downy_wood_mint, Blephilia_celiata calamint common_calamint, Calamintha_sylvatica, Satureja_calamintha_officinalis large-flowered_calamint, Calamintha_grandiflora, Clinopodium_grandiflorum, Satureja_grandiflora lesser_calamint, field_balm, Calamintha_nepeta, Calamintha_nepeta_glantulosa, Satureja_nepeta, Satureja_calamintha_glandulosa wild_basil, cushion_calamint, Clinopodium_vulgare, Satureja_vulgaris horse_balm, horseweed, stoneroot, stone-root, richweed, stone_root, Collinsonia_canadensis coleus, flame_nettle country_borage, Coleus_aromaticus, Coleus_amboinicus, Plectranthus_amboinicus painted_nettle, Joseph's_coat, Coleus_blumei, Solenostemon_blumei, Solenostemon_scutellarioides Apalachicola_rosemary, Conradina_glabra dragonhead, dragon's_head, Dracocephalum_parviflorum elsholtzia hemp_nettle, dead_nettle, Galeopsis_tetrahit ground_ivy, alehoof, field_balm, gill-over-the-ground, runaway_robin, Glechoma_hederaceae, Nepeta_hederaceae pennyroyal, American_pennyroyal, Hedeoma_pulegioides hyssop, Hyssopus_officinalis dead_nettle white_dead_nettle, Lamium_album henbit, Lamium_amplexicaule English_lavender, Lavandula_angustifolia, Lavandula_officinalis French_lavender, Lavandula_stoechas spike_lavender, French_lavender, Lavandula_latifolia dagga, Cape_dagga, red_dagga, wilde_dagga, Leonotis_leonurus lion's-ear, Leonotis_nepetaefolia, Leonotis_nepetifolia motherwort, Leonurus_cardiaca pitcher_sage, Lepechinia_calycina, Sphacele_calycina bugleweed, Lycopus_virginicus water_horehound, Lycopus_americanus gipsywort, gypsywort, Lycopus_europaeus origanum oregano, marjoram, pot_marjoram, wild_marjoram, winter_sweet, Origanum_vulgare sweet_marjoram, knotted_marjoram, Origanum_majorana, Majorana_hortensis horehound common_horehound, white_horehound, Marrubium_vulgare lemon_balm, garden_balm, sweet_balm, bee_balm, beebalm, Melissa_officinalis corn_mint, field_mint, Mentha_arvensis water-mint, water_mint, Mentha_aquatica bergamot_mint, lemon_mint, eau_de_cologne_mint, Mentha_citrata horsemint, Mentha_longifolia peppermint, Mentha_piperita spearmint, Mentha_spicata apple_mint, applemint, Mentha_rotundifolia, Mentha_suaveolens pennyroyal, Mentha_pulegium yerba_buena, Micromeria_chamissonis, Micromeria_douglasii, Satureja_douglasii molucca_balm, bells_of_Ireland, Molucella_laevis monarda, wild_bergamot bee_balm, beebalm, bergamot_mint, oswego_tea, Monarda_didyma horsemint, Monarda_punctata bee_balm, beebalm, Monarda_fistulosa lemon_mint, horsemint, Monarda_citriodora plains_lemon_monarda, Monarda_pectinata basil_balm, Monarda_clinopodia mustang_mint, Monardella_lanceolata catmint, catnip, Nepeta_cataria basil beefsteak_plant, Perilla_frutescens_crispa phlomis Jerusalem_sage, Phlomis_fruticosa physostegia plectranthus patchouli, patchouly, pachouli, Pogostemon_cablin self-heal, heal_all, Prunella_vulgaris mountain_mint rosemary, Rosmarinus_officinalis clary_sage, Salvia_clarea purple_sage, chaparral_sage, Salvia_leucophylla cancerweed, cancer_weed, Salvia_lyrata common_sage, ramona, Salvia_officinalis meadow_clary, Salvia_pratensis clary, Salvia_sclarea pitcher_sage, Salvia_spathacea Mexican_mint, Salvia_divinorum wild_sage, wild_clary, vervain_sage, Salvia_verbenaca savory summer_savory, Satureja_hortensis, Satureia_hortensis winter_savory, Satureja_montana, Satureia_montana skullcap, helmetflower blue_pimpernel, blue_skullcap, mad-dog_skullcap, mad-dog_weed, Scutellaria_lateriflora hedge_nettle, dead_nettle, Stachys_sylvatica hedge_nettle, Stachys_palustris germander American_germander, wood_sage, Teucrium_canadense cat_thyme, marum, Teucrium_marum wood_sage, Teucrium_scorodonia thyme common_thyme, Thymus_vulgaris wild_thyme, creeping_thyme, Thymus_serpyllum blue_curls turpentine_camphor_weed, camphorweed, vinegarweed, Trichostema_lanceolatum bastard_pennyroyal, Trichostema_dichotomum bladderwort butterwort genlisea martynia, Martynia_annua common_unicorn_plant, devil's_claw, common_devil's_claw, elephant-tusk, proboscis_flower, ram's_horn, Proboscidea_louisianica sand_devil's_claw, Proboscidea_arenaria, Martynia_arenaria sweet_unicorn_plant, Proboscidea_fragrans, Martynia_fragrans figwort snapdragon white_snapdragon, Antirrhinum_coulterianum yellow_twining_snapdragon, Antirrhinum_filipes Mediterranean_snapdragon, Antirrhinum_majus kitten-tails Alpine_besseya, Besseya_alpina false_foxglove, Aureolaria_pedicularia, Gerardia_pedicularia false_foxglove, Aureolaria_virginica, Gerardia_virginica calceolaria, slipperwort Indian_paintbrush, painted_cup desert_paintbrush, Castilleja_chromosa giant_red_paintbrush, Castilleja_miniata great_plains_paintbrush, Castilleja_sessiliflora sulfur_paintbrush, Castilleja_sulphurea shellflower, shell-flower, turtlehead, snakehead, snake-head, Chelone_glabra maiden_blue-eyed_Mary, Collinsia_parviflora blue-eyed_Mary, Collinsia_verna foxglove, digitalis common_foxglove, fairy_bell, fingerflower, finger-flower, fingerroot, finger-root, Digitalis_purpurea yellow_foxglove, straw_foxglove, Digitalis_lutea gerardia blue_toadflax, old-field_toadflax, Linaria_canadensis toadflax, butter-and-eggs, wild_snapdragon, devil's_flax, Linaria_vulgaris golden-beard_penstemon, Penstemon_barbatus scarlet_bugler, Penstemon_centranthifolius red_shrubby_penstemon, redwood_penstemon Platte_River_penstemon, Penstemon_cyananthus hot-rock_penstemon, Penstemon_deustus Jones'_penstemon, Penstemon_dolius shrubby_penstemon, lowbush_penstemon, Penstemon_fruticosus narrow-leaf_penstemon, Penstemon_linarioides balloon_flower, scented_penstemon, Penstemon_palmeri Parry's_penstemon, Penstemon_parryi rock_penstemon, cliff_penstemon, Penstemon_rupicola Rydberg's_penstemon, Penstemon_rydbergii cascade_penstemon, Penstemon_serrulatus Whipple's_penstemon, Penstemon_whippleanus moth_mullein, Verbascum_blattaria white_mullein, Verbascum_lychnitis purple_mullein, Verbascum_phoeniceum common_mullein, great_mullein, Aaron's_rod, flannel_mullein, woolly_mullein, torch, Verbascum_thapsus veronica, speedwell field_speedwell, Veronica_agrestis brooklime, American_brooklime, Veronica_americana corn_speedwell, Veronica_arvensis brooklime, European_brooklime, Veronica_beccabunga germander_speedwell, bird's_eye, Veronica_chamaedrys water_speedwell, Veronica_michauxii, Veronica_anagallis-aquatica common_speedwell, gypsyweed, Veronica_officinalis purslane_speedwell, Veronica_peregrina thyme-leaved_speedwell, Veronica_serpyllifolia nightshade horse_nettle, ball_nettle, bull_nettle, ball_nightshade, Solanum_carolinense African_holly, Solanum_giganteum potato_vine, Solanum_jasmoides garden_huckleberry, wonderberry, sunberry, Solanum_nigrum_guineese, Solanum_melanocerasum, Solanum_burbankii naranjilla, Solanum_quitoense potato_vine, giant_potato_creeper, Solanum_wendlandii potato_tree, Brazilian_potato_tree, Solanum_wrightii, Solanum_macranthum belladonna, belladonna_plant, deadly_nightshade, Atropa_belladonna bush_violet, browallia lady-of-the-night, Brunfelsia_americana angel's_trumpet, maikoa, Brugmansia_arborea, Datura_arborea angel's_trumpet, Brugmansia_suaveolens, Datura_suaveolens red_angel's_trumpet, Brugmansia_sanguinea, Datura_sanguinea cone_pepper, Capsicum_annuum_conoides bird_pepper, Capsicum_frutescens_baccatum, Capsicum_baccatum day_jessamine, Cestrum_diurnum night_jasmine, night_jessamine, Cestrum_nocturnum tree_tomato, tamarillo thorn_apple jimsonweed, jimson_weed, Jamestown_weed, common_thorn_apple, apple_of_Peru, Datura_stramonium pichi, Fabiana_imbricata henbane, black_henbane, stinking_nightshade, Hyoscyamus_niger Egyptian_henbane, Hyoscyamus_muticus matrimony_vine, boxthorn common_matrimony_vine, Duke_of_Argyll's_tea_tree, Lycium_barbarum, Lycium_halimifolium Christmasberry, Christmas_berry, Lycium_carolinianum plum_tomato mandrake, devil's_apples, Mandragora_officinarum mandrake_root, mandrake apple_of_Peru, shoo_fly, Nicandra_physaloides flowering_tobacco, Jasmine_tobacco, Nicotiana_alata common_tobacco, Nicotiana_tabacum wild_tobacco, Indian_tobacco, Nicotiana_rustica cupflower, nierembergia whitecup, Nierembergia_repens, Nierembergia_rivularis petunia large_white_petunia, Petunia_axillaris violet-flowered_petunia, Petunia_integrifolia hybrid_petunia, Petunia_hybrida cape_gooseberry, purple_ground_cherry, Physalis_peruviana strawberry_tomato, dwarf_cape_gooseberry, Physalis_pruinosa tomatillo, jamberry, Mexican_husk_tomato, Physalis_ixocarpa tomatillo, miltomate, purple_ground_cherry, jamberry, Physalis_philadelphica yellow_henbane, Physalis_viscosa cock's_eggs, Salpichroa_organifolia, Salpichroa_rhomboidea salpiglossis painted_tongue, Salpiglossis_sinuata butterfly_flower, poor_man's_orchid, schizanthus Scopolia_carniolica chalice_vine, trumpet_flower, cupflower, Solandra_guttata verbena, vervain lantana black_mangrove, Avicennia_marina white_mangrove, Avicennia_officinalis black_mangrove, Aegiceras_majus teak, Tectona_grandis spurge sun_spurge, wartweed, wartwort, devil's_milk, Euphorbia_helioscopia petty_spurge, devil's_milk, Euphorbia_peplus medusa's_head, Euphorbia_medusae, Euphorbia_caput-medusae wild_spurge, flowering_spurge, tramp's_spurge, Euphorbia_corollata snow-on-the-mountain, snow-in-summer, ghost_weed, Euphorbia_marginata cypress_spurge, Euphorbia_cyparissias leafy_spurge, wolf's_milk, Euphorbia_esula hairy_spurge, Euphorbia_hirsuta poinsettia, Christmas_star, Christmas_flower, lobster_plant, Mexican_flameleaf, painted_leaf, Euphorbia_pulcherrima Japanese_poinsettia, mole_plant, paint_leaf, Euphorbia_heterophylla fire-on-the-mountain, painted_leaf, Mexican_fire_plant, Euphorbia_cyathophora wood_spurge, Euphorbia_amygdaloides dwarf_spurge, Euphorbia_exigua scarlet_plume, Euphorbia_fulgens naboom, cactus_euphorbia, Euphorbia_ingens crown_of_thorns, Christ_thorn, Christ_plant, Euphorbia_milii toothed_spurge, Euphorbia_dentata three-seeded_mercury, Acalypha_virginica croton, Croton_tiglium cascarilla, Croton_eluteria cascarilla_bark, eleuthera_bark, sweetwood_bark castor-oil_plant, castor_bean_plant, palma_christi, palma_christ, Ricinus_communis spurge_nettle, tread-softly, devil_nettle, pica-pica, Cnidoscolus_urens, Jatropha_urens, Jatropha_stimulosus physic_nut, Jatropha_curcus Para_rubber_tree, caoutchouc_tree, Hevea_brasiliensis cassava, casava bitter_cassava, manioc, mandioc, mandioca, tapioca_plant, gari, Manihot_esculenta, Manihot_utilissima cassava, manioc sweet_cassava, Manihot_dulcis candlenut, varnish_tree, Aleurites_moluccana tung_tree, tung, tung-oil_tree, Aleurites_fordii slipper_spurge, slipper_plant candelilla, Pedilanthus_bracteatus, Pedilanthus_pavonis Jewbush, Jew-bush, Jew_bush, redbird_cactus, redbird_flower, Pedilanthus_tithymaloides jumping_bean, jumping_seed, Mexican_jumping_bean camellia, camelia japonica, Camellia_japonica umbellifer, umbelliferous_plant wild_parsley fool's_parsley, lesser_hemlock, Aethusa_cynapium dill, Anethum_graveolens angelica, angelique garden_angelica, archangel, Angelica_Archangelica wild_angelica, Angelica_sylvestris chervil, beaked_parsley, Anthriscus_cereifolium cow_parsley, wild_chervil, Anthriscus_sylvestris wild_celery, Apium_graveolens astrantia, masterwort greater_masterwort, Astrantia_major caraway, Carum_carvi whorled_caraway water_hemlock, Cicuta_verosa spotted_cowbane, spotted_hemlock, spotted_water_hemlock hemlock, poison_hemlock, poison_parsley, California_fern, Nebraska_fern, winter_fern, Conium_maculatum earthnut, Conopodium_denudatum cumin, Cuminum_cyminum wild_carrot, Queen_Anne's_lace, Daucus_carota eryngo, eringo sea_holly, sea_holm, sea_eryngium, Eryngium_maritimum button_snakeroot, Eryngium_aquaticum rattlesnake_master, rattlesnake's_master, button_snakeroot, Eryngium_yuccifolium fennel common_fennel, Foeniculum_vulgare Florence_fennel, Foeniculum_dulce, Foeniculum_vulgare_dulce cow_parsnip, hogweed, Heracleum_sphondylium lovage, Levisticum_officinale sweet_cicely, Myrrhis_odorata water_fennel, Oenanthe_aquatica parsnip, Pastinaca_sativa cultivated_parsnip wild_parsnip, madnep parsley, Petroselinum_crispum Italian_parsley, flat-leaf_parsley, Petroselinum_crispum_neapolitanum Hamburg_parsley, turnip-rooted_parsley, Petroselinum_crispum_tuberosum anise, anise_plant, Pimpinella_anisum sanicle, snakeroot purple_sanicle, Sanicula_bipinnatifida European_sanicle, Sanicula_Europaea water_parsnip, Sium_suave greater_water_parsnip, Sium_latifolium skirret, Sium_sisarum dogwood, dogwood_tree, cornel common_white_dogwood, eastern_flowering_dogwood, Cornus_florida red_osier, red_osier_dogwood, red_dogwood, American_dogwood, redbrush, Cornus_stolonifera silky_dogwood, Cornus_obliqua silky_cornel, silky_dogwood, Cornus_amomum common_European_dogwood, red_dogwood, blood-twig, pedwood, Cornus_sanguinea bunchberry, dwarf_cornel, crackerberry, pudding_berry, Cornus_canadensis cornelian_cherry, Cornus_mas puka, Griselinia_lucida kapuka, Griselinia_littoralis valerian common_valerian, garden_heliotrope, Valeriana_officinalis common_corn_salad, lamb's_lettuce, Valerianella_olitoria, Valerianella_locusta red_valerian, French_honeysuckle, Centranthus_ruber filmy_fern, film_fern bristle_fern, filmy_fern hare's-foot_bristle_fern, Trichomanes_boschianum Killarney_fern, Trichomanes_speciosum kidney_fern, Trichomanes_reniforme flowering_fern, osmund royal_fern, royal_osmund, king_fern, ditch_fern, French_bracken, Osmunda_regalis interrupted_fern, Osmunda_clatonia crape_fern, Prince-of-Wales_fern, Prince-of-Wales_feather, Prince-of-Wales_plume, Leptopteris_superba, Todea_superba crepe_fern, king_fern, Todea_barbara curly_grass, curly_grass_fern, Schizaea_pusilla pine_fern, Anemia_adiantifolia climbing_fern creeping_fern, Hartford_fern, Lygodium_palmatum climbing_maidenhair, climbing_maidenhair_fern, snake_fern, Lygodium_microphyllum scented_fern, Mohria_caffrorum clover_fern, pepperwort nardoo, nardo, common_nardoo, Marsilea_drummondii water_clover, Marsilea_quadrifolia pillwort, Pilularia_globulifera regnellidium, Regnellidium_diphyllum floating-moss, Salvinia_rotundifolia, Salvinia_auriculata mosquito_fern, floating_fern, Carolina_pond_fern, Azolla_caroliniana adder's_tongue, adder's_tongue_fern ribbon_fern, Ophioglossum_pendulum grape_fern daisyleaf_grape_fern, daisy-leaved_grape_fern, Botrychium_matricariifolium leathery_grape_fern, Botrychium_multifidum rattlesnake_fern, Botrychium_virginianum flowering_fern, Helminthostachys_zeylanica powdery_mildew Dutch_elm_fungus, Ceratostomella_ulmi ergot, Claviceps_purpurea rye_ergot black_root_rot_fungus, Xylaria_mali dead-man's-fingers, dead-men's-fingers, Xylaria_polymorpha sclerotinia brown_cup earthball, false_truffle, puffball, hard-skinned_puffball Scleroderma_citrinum, Scleroderma_aurantium Scleroderma_flavidium, star_earthball Scleroderma_bovista, smooth_earthball Podaxaceae stalked_puffball stalked_puffball false_truffle Rhizopogon_idahoensis Truncocolumella_citrina mucor rhizopus bread_mold, Rhizopus_nigricans slime_mold, slime_mould true_slime_mold, acellular_slime_mold, plasmodial_slime_mold, myxomycete cellular_slime_mold dictostylium pond-scum_parasite potato_wart_fungus, Synchytrium_endobioticum white_fungus, Saprolegnia_ferax water_mold downy_mildew, false_mildew blue_mold_fungus, Peronospora_tabacina onion_mildew, Peronospora_destructor tobacco_mildew, Peronospora_hyoscyami white_rust pythium damping_off_fungus, Pythium_debaryanum Phytophthora_citrophthora Phytophthora_infestans clubroot_fungus, Plasmodiophora_brassicae Geglossaceae Sarcosomataceae Rufous_rubber_cup devil's_cigar devil's_urn truffle, earthnut, earth-ball club_fungus coral_fungus tooth_fungus lichen ascolichen basidiolichen lecanora manna_lichen archil, orchil roccella, Roccella_tinctoria beard_lichen, beard_moss, Usnea_barbata horsehair_lichen, horsetail_lichen reindeer_moss, reindeer_lichen, arctic_moss, Cladonia_rangiferina crottle, crottal, crotal Iceland_moss, Iceland_lichen, Cetraria_islandica fungus promycelium true_fungus basidiomycete, basidiomycetous_fungi mushroom agaric mushroom mushroom toadstool horse_mushroom, Agaricus_arvensis meadow_mushroom, field_mushroom, Agaricus_campestris shiitake, shiitake_mushroom, Chinese_black_mushroom, golden_oak_mushroom, Oriental_black_mushroom, Lentinus_edodes scaly_lentinus, Lentinus_lepideus royal_agaric, Caesar's_agaric, Amanita_caesarea false_deathcap, Amanita_mappa fly_agaric, Amanita_muscaria death_cap, death_cup, death_angel, destroying_angel, Amanita_phalloides blushing_mushroom, blusher, Amanita_rubescens destroying_angel, Amanita_verna chanterelle, chantarelle, Cantharellus_cibarius floccose_chanterelle, Cantharellus_floccosus pig's_ears, Cantharellus_clavatus cinnabar_chanterelle, Cantharellus_cinnabarinus jack-o-lantern_fungus, jack-o-lantern, jack-a-lantern, Omphalotus_illudens inky_cap, inky-cap_mushroom, Coprinus_atramentarius shaggymane, shaggy_cap, shaggymane_mushroom, Coprinus_comatus milkcap, Lactarius_delicioso fairy-ring_mushroom, Marasmius_oreades fairy_ring, fairy_circle oyster_mushroom, oyster_fungus, oyster_agaric, Pleurotus_ostreatus olive-tree_agaric, Pleurotus_phosphoreus Pholiota_astragalina Pholiota_aurea, golden_pholiota Pholiota_destruens Pholiota_flammans Pholiota_flavida nameko, viscid_mushroom, Pholiota_nameko Pholiota_squarrosa-adiposa Pholiota_squarrosa, scaly_pholiota Pholiota_squarrosoides Stropharia_ambigua Stropharia_hornemannii Stropharia_rugoso-annulata gill_fungus Entoloma_lividum, Entoloma_sinuatum Entoloma_aprile Chlorophyllum_molybdites lepiota parasol_mushroom, Lepiota_procera poisonous_parasol, Lepiota_morgani Lepiota_naucina Lepiota_rhacodes American_parasol, Lepiota_americana Lepiota_rubrotincta Lepiota_clypeolaria onion_stem, Lepiota_cepaestipes pink_disease_fungus, Corticium_salmonicolor bottom_rot_fungus, Corticium_solani potato_fungus, Pellicularia_filamentosa, Rhizoctinia_solani coffee_fungus, Pellicularia_koleroga blewits, Clitocybe_nuda sandy_mushroom, Tricholoma_populinum Tricholoma_pessundatum Tricholoma_sejunctum man-on-a-horse, Tricholoma_flavovirens Tricholoma_venenata Tricholoma_pardinum Tricholoma_vaccinum Tricholoma_aurantium Volvaria_bombycina Pluteus_aurantiorugosus Pluteus_magnus, sawdust_mushroom deer_mushroom, Pluteus_cervinus straw_mushroom, Chinese_mushroom, Volvariella_volvacea Volvariella_bombycina Clitocybe_clavipes Clitocybe_dealbata Clitocybe_inornata Clitocybe_robusta, Clytocybe_alba Clitocybe_irina, Tricholoma_irinum, Lepista_irina Clitocybe_subconnexa winter_mushroom, Flammulina_velutipes mycelium sclerotium sac_fungus ascomycete, ascomycetous_fungus Clavicipitaceae, grainy_club_mushrooms grainy_club yeast baker's_yeast, brewer's_yeast, Saccharomyces_cerevisiae wine-maker's_yeast, Saccharomyces_ellipsoides Aspergillus_fumigatus brown_root_rot_fungus, Thielavia_basicola discomycete, cup_fungus Leotia_lubrica Mitrula_elegans Sarcoscypha_coccinea, scarlet_cup Caloscypha_fulgens Aleuria_aurantia, orange_peel_fungus elf_cup Peziza_domicilina blood_cup, fairy_cup, Peziza_coccinea Urnula_craterium, urn_fungus Galiella_rufa Jafnea_semitosta morel common_morel, Morchella_esculenta, sponge_mushroom, sponge_morel Disciotis_venosa, cup_morel Verpa, bell_morel Verpa_bohemica, early_morel Verpa_conica, conic_Verpa black_morel, Morchella_conica, conic_morel, Morchella_angusticeps, narrowhead_morel Morchella_crassipes, thick-footed_morel Morchella_semilibera, half-free_morel, cow's_head Wynnea_americana Wynnea_sparassoides false_morel lorchel helvella Helvella_crispa, miter_mushroom Helvella_acetabulum Helvella_sulcata discina gyromitra Gyromitra_californica, California_false_morel Gyromitra_sphaerospora, round-spored_gyromitra Gyromitra_esculenta, brain_mushroom, beefsteak_morel Gyromitra_infula, saddled-shaped_false_morel Gyromitra_fastigiata, Gyromitra_brunnea Gyromitra_gigas gasteromycete, gastromycete stinkhorn, carrion_fungus common_stinkhorn, Phallus_impudicus Phallus_ravenelii dog_stinkhorn, Mutinus_caninus Calostoma_lutescens Calostoma_cinnabarina Calostoma_ravenelii stinky_squid, Pseudocolus_fusiformis puffball, true_puffball giant_puffball, Calvatia_gigantea earthstar Geastrum_coronatum Radiigera_fuscogleba Astreus_pteridis Astreus_hygrometricus bird's-nest_fungus Gastrocybe_lateritia Macowanites_americanus polypore, pore_fungus, pore_mushroom bracket_fungus, shelf_fungus Albatrellus_dispansus Albatrellus_ovinus, sheep_polypore Neolentinus_ponderosus Oligoporus_leucospongia Polyporus_tenuiculus hen-of-the-woods, hen_of_the_woods, Polyporus_frondosus, Grifola_frondosa Polyporus_squamosus, scaly_polypore beefsteak_fungus, Fistulina_hepatica agaric, Fomes_igniarius bolete Boletus_chrysenteron Boletus_edulis Frost's_bolete, Boletus_frostii Boletus_luridus Boletus_mirabilis Boletus_pallidus Boletus_pulcherrimus Boletus_pulverulentus Boletus_roxanae Boletus_subvelutipes Boletus_variipes Boletus_zelleri Fuscoboletinus_paluster Fuscoboletinus_serotinus Leccinum_fibrillosum Suillus_albivelatus old-man-of-the-woods, Strobilomyces_floccopus Boletellus_russellii jelly_fungus snow_mushroom, Tremella_fuciformis witches'_butter, Tremella_lutescens Tremella_foliacea Tremella_reticulata Jew's-ear, Jew's-ears, ear_fungus, Auricularia_auricula rust, rust_fungus aecium flax_rust, flax_rust_fungus, Melampsora_lini blister_rust, Cronartium_ribicola wheat_rust, Puccinia_graminis apple_rust, cedar-apple_rust, Gymnosporangium_juniperi-virginianae smut, smut_fungus covered_smut loose_smut cornsmut, corn_smut boil_smut, Ustilago_maydis Sphacelotheca, genus_Sphacelotheca head_smut, Sphacelotheca_reiliana bunt, Tilletia_caries bunt, stinking_smut, Tilletia_foetida onion_smut, Urocystis_cepulae flag_smut_fungus wheat_flag_smut, Urocystis_tritici felt_fungus, Septobasidium_pseudopedicellatum waxycap Hygrocybe_acutoconica, conic_waxycap Hygrophorus_borealis Hygrophorus_caeruleus Hygrophorus_inocybiformis Hygrophorus_kauffmanii Hygrophorus_marzuolus Hygrophorus_purpurascens Hygrophorus_russula Hygrophorus_sordidus Hygrophorus_tennesseensis Hygrophorus_turundus Neohygrophorus_angelesianus Cortinarius_armillatus Cortinarius_atkinsonianus Cortinarius_corrugatus Cortinarius_gentilis Cortinarius_mutabilis, purple-staining_Cortinarius Cortinarius_semisanguineus Cortinarius_subfoetidus Cortinarius_violaceus Gymnopilus_spectabilis Gymnopilus_validipes Gymnopilus_ventricosus mold, mould mildew verticillium monilia candida Candida_albicans, Monilia_albicans blastomycete yellow_spot_fungus, Cercospora_kopkei green_smut_fungus, Ustilaginoidea_virens dry_rot rhizoctinia houseplant bedder, bedding_plant succulent cultivar weed wort brier aril sporophyll, sporophyl sporangium, spore_case, spore_sac sporangiophore ascus ascospore arthrospore eusporangium tetrasporangium gametangium sorus sorus partial_veil lignum vascular_ray, medullary_ray phloem, bast evergreen, evergreen_plant deciduous_plant poisonous_plant vine creeper tendril root_climber lignosae arborescent_plant snag tree timber_tree treelet arbor bean_tree pollard sapling shade_tree gymnospermous_tree conifer, coniferous_tree angiospermous_tree, flowering_tree nut_tree spice_tree fever_tree stump, tree_stump bonsai ming_tree ming_tree undershrub subshrub, suffrutex bramble liana geophyte desert_plant, xerophyte, xerophytic_plant, xerophile, xerophilous_plant mesophyte, mesophytic_plant marsh_plant, bog_plant, swamp_plant hemiepiphyte, semiepiphyte strangler, strangler_tree lithophyte, lithophytic_plant saprobe autophyte, autophytic_plant, autotroph, autotrophic_organism root taproot prop_root prophyll rootstock quickset stolon, runner, offset tuberous_plant rhizome, rootstock, rootstalk rachis caudex cladode, cladophyll, phylloclad, phylloclade receptacle scape, flower_stalk umbel petiole, leafstalk peduncle pedicel, pedicle flower_cluster raceme panicle thyrse, thyrsus cyme cymule glomerule scorpioid_cyme ear, spike, capitulum spadix bulbous_plant bulbil, bulblet cormous_plant fruit fruitlet seed bean nut nutlet kernel, meat syconium berry aggregate_fruit, multiple_fruit, syncarp simple_fruit, bacca acinus drupe, stone_fruit drupelet pome, false_fruit pod, seedpod loment pyxidium, pyxis husk cornhusk pod, cod, seedcase accessory_fruit, pseudocarp buckthorn buckthorn_berry, yellow_berry cascara_buckthorn, bearberry, bearwood, chittamwood, chittimwood, Rhamnus_purshianus cascara, cascara_sagrada, chittam_bark, chittem_bark Carolina_buckthorn, indian_cherry, Rhamnus_carolinianus coffeeberry, California_buckthorn, California_coffee, Rhamnus_californicus redberry, red-berry, Rhamnus_croceus nakedwood jujube, jujube_bush, Christ's-thorn, Jerusalem_thorn, Ziziphus_jujuba Christ's-thorn, Jerusalem_thorn, Paliurus_spina-christi hazel, hazel_tree, Pomaderris_apetala fox_grape, Vitis_labrusca muscadine, Vitis_rotundifolia vinifera, vinifera_grape, common_grape_vine, Vitis_vinifera Pinot_blanc Sauvignon_grape Sauvignon_blanc Muscadet Riesling Zinfandel Chenin_blanc malvasia Verdicchio Boston_ivy, Japanese_ivy, Parthenocissus_tricuspidata Virginia_creeper, American_ivy, woodbine, Parthenocissus_quinquefolia true_pepper, pepper_vine betel, betel_pepper, Piper_betel cubeb schizocarp peperomia watermelon_begonia, Peperomia_argyreia, Peperomia_sandersii yerba_mansa, Anemopsis_californica pinna, pinnule frond bract bracteole, bractlet involucre glume palmate_leaf pinnate_leaf bijugate_leaf, bijugous_leaf, twice-pinnate decompound_leaf acuminate_leaf deltoid_leaf ensiform_leaf linear_leaf, elongate_leaf lyrate_leaf obtuse_leaf oblanceolate_leaf pandurate_leaf, panduriform_leaf reniform_leaf spatulate_leaf even-pinnate_leaf, abruptly-pinnate_leaf odd-pinnate_leaf pedate_leaf crenate_leaf dentate_leaf denticulate_leaf erose_leaf runcinate_leaf prickly-edged_leaf deadwood haulm, halm branchlet, twig, sprig osier giant_scrambling_fern, Diplopterygium_longissimum umbrella_fern, fan_fern, Sticherus_flabellatus, Gleichenia_flabellata floating_fern, water_sprite, Ceratopteris_pteridioides polypody licorice_fern, Polypodium_glycyrrhiza grey_polypody, gray_polypody, resurrection_fern, Polypodium_polypodioides leatherleaf, leathery_polypody, coast_polypody, Polypodium_scouleri rock_polypody, rock_brake, American_wall_fern, Polypodium_virgianum common_polypody, adder's_fern, wall_fern, golden_maidenhair, golden_polypody, sweet_fern, Polypodium_vulgare bear's-paw_fern, Aglaomorpha_meyeniana strap_fern Florida_strap_fern, cow-tongue_fern, hart's-tongue_fern basket_fern, Drynaria_rigidula snake_polypody, Microgramma-piloselloides climbing_bird's_nest_fern, Microsorium_punctatum golden_polypody, serpent_fern, rabbit's-foot_fern, Phlebodium_aureum, Polypodium_aureum staghorn_fern South_American_staghorn, Platycerium_andinum common_staghorn_fern, elkhorn_fern, Platycerium_bifurcatum, Platycerium_alcicorne felt_fern, tongue_fern, Pyrrosia_lingua, Cyclophorus_lingua potato_fern, Solanopteris_bifrons myrmecophyte grass_fern, ribbon_fern, Vittaria_lineata spleenwort black_spleenwort, Asplenium_adiantum-nigrum bird's_nest_fern, Asplenium_nidus ebony_spleenwort, Scott's_Spleenwort, Asplenium_platyneuron black-stem_spleenwort, black-stemmed_spleenwort, little_ebony_spleenwort walking_fern, walking_leaf, Asplenium_rhizophyllum, Camptosorus_rhizophyllus green_spleenwort, Asplenium_viride mountain_spleenwort, Asplenium_montanum lobed_spleenwort, Asplenium_pinnatifidum lanceolate_spleenwort, Asplenium_billotii hart's-tongue, hart's-tongue_fern, Asplenium_scolopendrium, Phyllitis_scolopendrium scale_fern, scaly_fern, Asplenium_ceterach, Ceterach_officinarum scolopendrium deer_fern, Blechnum_spicant doodia, rasp_fern chain_fern Virginia_chain_fern, Woodwardia_virginica silver_tree_fern, sago_fern, black_tree_fern, Cyathea_medullaris davallia hare's-foot_fern Canary_Island_hare's_foot_fern, Davallia_canariensis squirrel's-foot_fern, ball_fern, Davalia_bullata, Davalia_bullata_mariesii, Davallia_Mariesii bracken, Pteridium_esculentum soft_tree_fern, Dicksonia_antarctica Scythian_lamb, Cibotium_barometz false_bracken, Culcita_dubia thyrsopteris, Thyrsopteris_elegans shield_fern, buckler_fern broad_buckler-fern, Dryopteris_dilatata fragrant_cliff_fern, fragrant_shield_fern, fragrant_wood_fern, Dryopteris_fragrans Goldie's_fern, Goldie's_shield_fern, goldie's_wood_fern, Dryopteris_goldiana wood_fern, wood-fern, woodfern male_fern, Dryopteris_filix-mas marginal_wood_fern, evergreen_wood_fern, leatherleaf_wood_fern, Dryopteris_marginalis mountain_male_fern, Dryopteris_oreades lady_fern, Athyrium_filix-femina Alpine_lady_fern, Athyrium_distentifolium silvery_spleenwort, glade_fern, narrow-leaved_spleenwort, Athyrium_pycnocarpon, Diplazium_pycnocarpon holly_fern, Cyrtomium_aculeatum, Polystichum_aculeatum bladder_fern brittle_bladder_fern, brittle_fern, fragile_fern, Cystopteris_fragilis mountain_bladder_fern, Cystopteris_montana bulblet_fern, bulblet_bladder_fern, berry_fern, Cystopteris_bulbifera silvery_spleenwort, Deparia_acrostichoides, Athyrium_thelypteroides oak_fern, Gymnocarpium_dryopteris, Thelypteris_dryopteris limestone_fern, northern_oak_fern, Gymnocarpium_robertianum ostrich_fern, shuttlecock_fern, fiddlehead, Matteuccia_struthiopteris, Pteretis_struthiopteris, Onoclea_struthiopteris hart's-tongue, hart's-tongue_fern, Olfersia_cervina, Polybotrya_cervina, Polybotria_cervina sensitive_fern, bead_fern, Onoclea_sensibilis Christmas_fern, canker_brake, dagger_fern, evergreen_wood_fern, Polystichum_acrostichoides holly_fern Braun's_holly_fern, prickly_shield_fern, Polystichum_braunii western_holly_fern, Polystichum_scopulinum soft_shield_fern, Polystichum_setiferum leather_fern, leatherleaf_fern, ten-day_fern, Rumohra_adiantiformis, Polystichum_adiantiformis button_fern, Tectaria_cicutaria Indian_button_fern, Tectaria_macrodonta woodsia rusty_woodsia, fragrant_woodsia, oblong_woodsia, Woodsia_ilvensis Alpine_woodsia, northern_woodsia, flower-cup_fern, Woodsia_alpina smooth_woodsia, Woodsia_glabella Boston_fern, Nephrolepis_exaltata, Nephrolepis_exaltata_bostoniensis basket_fern, toothed_sword_fern, Nephrolepis_pectinata golden_fern, leather_fern, Acrostichum_aureum maidenhair, maidenhair_fern common_maidenhair, Venushair, Venus'-hair_fern, southern_maidenhair, Venus_maidenhair, Adiantum_capillus-veneris American_maidenhair_fern, five-fingered_maidenhair_fern, Adiantum_pedatum Bermuda_maidenhair, Bermuda_maidenhair_fern, Adiantum_bellum brittle_maidenhair, brittle_maidenhair_fern, Adiantum_tenerum Farley_maidenhair, Farley_maidenhair_fern, Barbados_maidenhair, glory_fern, Adiantum_tenerum_farleyense annual_fern, Jersey_fern, Anogramma_leptophylla lip_fern, lipfern smooth_lip_fern, Alabama_lip_fern, Cheilanthes_alabamensis lace_fern, Cheilanthes_gracillima wooly_lip_fern, hairy_lip_fern, Cheilanthes_lanosa southwestern_lip_fern, Cheilanthes_eatonii bamboo_fern, Coniogramme_japonica American_rock_brake, American_parsley_fern, Cryptogramma_acrostichoides European_parsley_fern, mountain_parsley_fern, Cryptogramma_crispa hand_fern, Doryopteris_pedata cliff_brake, cliff-brake, rock_brake coffee_fern, Pellaea_andromedifolia purple_rock_brake, Pellaea_atropurpurea bird's-foot_fern, Pellaea_mucronata, Pellaea_ornithopus button_fern, Pellaea_rotundifolia silver_fern, Pityrogramma_argentea golden_fern, Pityrogramma_calomelanos_aureoflava gold_fern, Pityrogramma_chrysophylla Pteris_cretica spider_brake, spider_fern, Pteris_multifida ribbon_fern, spider_fern, Pteris_serrulata potato_fern, Marattia_salicina angiopteris, giant_fern, Angiopteris_evecta skeleton_fork_fern, Psilotum_nudum horsetail common_horsetail, field_horsetail, Equisetum_arvense swamp_horsetail, water_horsetail, Equisetum_fluviatile scouring_rush, rough_horsetail, Equisetum_hyemale, Equisetum_hyemale_robustum, Equisetum_robustum marsh_horsetail, Equisetum_palustre wood_horsetail, Equisetum_Sylvaticum variegated_horsetail, variegated_scouring_rush, Equisetum_variegatum club_moss, club-moss, lycopod shining_clubmoss, Lycopodium_lucidulum alpine_clubmoss, Lycopodium_alpinum fir_clubmoss, mountain_clubmoss, little_clubmoss, Lycopodium_selago ground_cedar, staghorn_moss, Lycopodium_complanatum ground_fir, princess_pine, tree_clubmoss, Lycopodium_obscurum foxtail_grass, Lycopodium_alopecuroides spikemoss, spike_moss, little_club_moss meadow_spikemoss, basket_spikemoss, Selaginella_apoda desert_selaginella, Selaginella_eremophila resurrection_plant, rose_of_Jericho, Selaginella_lepidophylla florida_selaginella, Selaginella_eatonii quillwort earthtongue, earth-tongue snuffbox_fern, meadow_fern, Thelypteris_palustris_pubescens, Dryopteris_thelypteris_pubescens christella mountain_fern, Oreopteris_limbosperma, Dryopteris_oreopteris New_York_fern, Parathelypteris_novae-boracensis, Dryopteris_noveboracensis Massachusetts_fern, Parathelypteris_simulata, Thelypteris_simulata beech_fern broad_beech_fern, southern_beech_fern, Phegopteris_hexagonoptera, Dryopteris_hexagonoptera, Thelypteris_hexagonoptera long_beech_fern, narrow_beech_fern, northern_beech_fern, Phegopteris_connectilis, Dryopteris_phegopteris, Thelypteris_phegopteris shoestring_fungus Armillaria_caligata, booted_armillaria Armillaria_ponderosa, white_matsutake Armillaria_zelleri honey_mushroom, honey_fungus, Armillariella_mellea milkweed, silkweed white_milkweed, Asclepias_albicans poke_milkweed, Asclepias_exaltata swamp_milkweed, Asclepias_incarnata Mead's_milkweed, Asclepias_meadii, Asclepia_meadii purple_silkweed, Asclepias_purpurascens showy_milkweed, Asclepias_speciosa poison_milkweed, horsetail_milkweed, Asclepias_subverticillata butterfly_weed, orange_milkweed, chigger_flower, chiggerflower, pleurisy_root, tuber_root, Indian_paintbrush, Asclepias_tuberosa whorled_milkweed, Asclepias_verticillata cruel_plant, Araujia_sericofera wax_plant, Hoya_carnosa silk_vine, Periploca_graeca stapelia, carrion_flower, starfish_flower Stapelias_asterias stephanotis Madagascar_jasmine, waxflower, Stephanotis_floribunda negro_vine, Vincetoxicum_hirsutum, Vincetoxicum_negrum zygospore tree_of_knowledge orangery pocketbook shit, dump cordage yard, pace extremum, peak leaf_shape, leaf_form equilateral figure pencil plane_figure, two-dimensional_figure solid_figure, three-dimensional_figure line bulb convex_shape, convexity concave_shape, concavity, incurvation, incurvature cylinder round_shape heart polygon, polygonal_shape convex_polygon concave_polygon reentrant_polygon, reentering_polygon amorphous_shape closed_curve simple_closed_curve, Jordan_curve S-shape wave, undulation extrados hook, crotchet envelope bight diameter cone, conoid, cone_shape funnel, funnel_shape oblong circle circle equator scallop, crenation, crenature, crenel, crenelle ring, halo, annulus, doughnut, anchor_ring loop bight helix, spiral element_of_a_cone element_of_a_cylinder ellipse, oval quadrate triangle, trigon, trilateral acute_triangle, acute-angled_triangle isosceles_triangle obtuse_triangle, obtuse-angled_triangle right_triangle, right-angled_triangle scalene_triangle parallel trapezoid star pentagon hexagon heptagon octagon nonagon decagon rhombus, rhomb, diamond spherical_polygon spherical_triangle convex_polyhedron concave_polyhedron cuboid quadrangular_prism bell, bell_shape, campana angular_distance true_anomaly spherical_angle angle_of_refraction acute_angle groove, channel rut bulge, bump, hump, swelling, gibbosity, gibbousness, jut, prominence, protuberance, protrusion, extrusion, excrescence belly bow, arc crescent ellipsoid hypotenuse balance, equilibrium, equipoise, counterbalance conformation symmetry, proportion spheroid, ellipsoid_of_revolution spherule toroid column, tower, pillar barrel, drum pipe, tube pellet bolus dewdrop ridge rim taper boundary, edge, bound incisure, incisura notch wrinkle, furrow, crease, crinkle, seam, line dermatoglyphic frown_line line_of_life, life_line, lifeline line_of_heart, heart_line, love_line, mensal_line crevice, cranny, crack, fissure, chap cleft roulette, line_roulette node tree, tree_diagram stemma brachium fork, crotch block, cube ovoid tetrahedron pentahedron hexahedron regular_polyhedron, regular_convex_solid, regular_convex_polyhedron, Platonic_body, Platonic_solid, ideal_solid polyhedral_angle cube, regular_hexahedron truncated_pyramid truncated_cone tail, tail_end tongue, knife trapezohedron wedge, wedge_shape, cuneus keel place, shoes herpes chlamydia wall micronutrient chyme ragweed_pollen pina_cloth chlorobenzylidenemalononitrile, CS_gas carbon, C, atomic_number_6 charcoal, wood_coal rock, stone gravel, crushed_rock aflatoxin alpha-tocopheral leopard bricks_and_mortar lagging hydraulic_cement, Portland_cement choline concrete glass_wool soil, dirt high_explosive litter fish_meal Greek_fire culture_medium, medium agar, nutrient_agar blood_agar hip_tile, hipped_tile hyacinth, jacinth hydroxide_ion, hydroxyl_ion ice, water_ice inositol linoleum, lino lithia_water lodestone, loadstone pantothenic_acid, pantothen paper papyrus pantile blacktop, blacktopping tarmacadam, tarmac paving, pavement, paving_material plaster poison_gas ridge_tile roughcast sand spackle, spackling_compound render wattle_and_daub stucco tear_gas, teargas, lacrimator, lachrymator toilet_tissue, toilet_paper, bathroom_tissue linseed, flaxseed vitamin fat-soluble_vitamin water-soluble_vitamin vitamin_A, antiophthalmic_factor, axerophthol, A vitamin_A1, retinol vitamin_A2, dehydroretinol B-complex_vitamin, B_complex, vitamin_B_complex, vitamin_B, B_vitamin, B vitamin_B1, thiamine, thiamin, aneurin, antiberiberi_factor vitamin_B12, cobalamin, cyanocobalamin, antipernicious_anemia_factor vitamin_B2, vitamin_G, riboflavin, lactoflavin, ovoflavin, hepatoflavin vitamin_B6, pyridoxine, pyridoxal, pyridoxamine, adermin vitamin_Bc, vitamin_M, folate, folic_acid, folacin, pteroylglutamic_acid, pteroylmonoglutamic_acid niacin, nicotinic_acid vitamin_D, calciferol, viosterol, ergocalciferol, cholecalciferol, D vitamin_E, tocopherol, E biotin, vitamin_H vitamin_K, naphthoquinone, antihemorrhagic_factor vitamin_K1, phylloquinone, phytonadione vitamin_K3, menadione vitamin_P, bioflavinoid, citrin vitamin_C, C, ascorbic_acid planking chipboard, hardboard knothole ================================================ FILE: pytorch_classification/grad_cam/main_cnn.py ================================================ import os import numpy as np import torch from PIL import Image import matplotlib.pyplot as plt from torchvision import models from torchvision import transforms from utils import GradCAM, show_cam_on_image, center_crop_img def main(): model = models.mobilenet_v3_large(pretrained=True) target_layers = [model.features[-1]] # model = models.vgg16(pretrained=True) # target_layers = [model.features] # model = models.resnet34(pretrained=True) # target_layers = [model.layer4] # model = models.regnet_y_800mf(pretrained=True) # target_layers = [model.trunk_output] # model = models.efficientnet_b0(pretrained=True) # target_layers = [model.features] data_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "both.png" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path).convert('RGB') img = np.array(img, dtype=np.uint8) # img = center_crop_img(img, 224) # [C, H, W] img_tensor = data_transform(img) # expand batch dimension # [C, H, W] -> [N, C, H, W] input_tensor = torch.unsqueeze(img_tensor, dim=0) cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False) target_category = 281 # tabby, tabby cat # target_category = 254 # pug, pug-dog grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category) grayscale_cam = grayscale_cam[0, :] visualization = show_cam_on_image(img.astype(dtype=np.float32) / 255., grayscale_cam, use_rgb=True) plt.imshow(visualization) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/grad_cam/main_swin.py ================================================ import os import math import numpy as np import torch from PIL import Image import matplotlib.pyplot as plt from torchvision import transforms from utils import GradCAM, show_cam_on_image, center_crop_img from swin_model import swin_base_patch4_window7_224 class ResizeTransform: def __init__(self, im_h: int, im_w: int): self.height = self.feature_size(im_h) self.width = self.feature_size(im_w) @staticmethod def feature_size(s): s = math.ceil(s / 4) # PatchEmbed s = math.ceil(s / 2) # PatchMerging1 s = math.ceil(s / 2) # PatchMerging2 s = math.ceil(s / 2) # PatchMerging3 return s def __call__(self, x): result = x.reshape(x.size(0), self.height, self.width, x.size(2)) # Bring the channels to the first dimension, # like in CNNs. # [batch_size, H, W, C] -> [batch, C, H, W] result = result.permute(0, 3, 1, 2) return result def main(): # 注意输入的图片必须是32的整数倍 # 否则由于padding的原因会出现注意力飘逸的问题 img_size = 224 assert img_size % 32 == 0 model = swin_base_patch4_window7_224() # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth weights_path = "./swin_base_patch4_window7_224.pth" model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"], strict=False) target_layers = [model.norm] data_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "both.png" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path).convert('RGB') img = np.array(img, dtype=np.uint8) img = center_crop_img(img, img_size) # [C, H, W] img_tensor = data_transform(img) # expand batch dimension # [C, H, W] -> [N, C, H, W] input_tensor = torch.unsqueeze(img_tensor, dim=0) cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False, reshape_transform=ResizeTransform(im_h=img_size, im_w=img_size)) target_category = 281 # tabby, tabby cat # target_category = 254 # pug, pug-dog grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category) grayscale_cam = grayscale_cam[0, :] visualization = show_cam_on_image(img / 255., grayscale_cam, use_rgb=True) plt.imshow(visualization) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/grad_cam/main_vit.py ================================================ import os import numpy as np import torch from PIL import Image import matplotlib.pyplot as plt from torchvision import transforms from utils import GradCAM, show_cam_on_image, center_crop_img from vit_model import vit_base_patch16_224 class ReshapeTransform: def __init__(self, model): input_size = model.patch_embed.img_size patch_size = model.patch_embed.patch_size self.h = input_size[0] // patch_size[0] self.w = input_size[1] // patch_size[1] def __call__(self, x): # remove cls token and reshape # [batch_size, num_tokens, token_dim] result = x[:, 1:, :].reshape(x.size(0), self.h, self.w, x.size(2)) # Bring the channels to the first dimension, # like in CNNs. # [batch_size, H, W, C] -> [batch, C, H, W] result = result.permute(0, 3, 1, 2) return result def main(): model = vit_base_patch16_224() # 链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA 密码: eu9f weights_path = "./vit_base_patch16_224.pth" model.load_state_dict(torch.load(weights_path, map_location="cpu")) # Since the final classification is done on the class token computed in the last attention block, # the output will not be affected by the 14x14 channels in the last layer. # The gradient of the output with respect to them, will be 0! # We should chose any layer before the final attention block. target_layers = [model.blocks[-1].norm1] data_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) # load image img_path = "both.png" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path).convert('RGB') img = np.array(img, dtype=np.uint8) img = center_crop_img(img, 224) # [C, H, W] img_tensor = data_transform(img) # expand batch dimension # [C, H, W] -> [N, C, H, W] input_tensor = torch.unsqueeze(img_tensor, dim=0) cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False, reshape_transform=ReshapeTransform(model)) target_category = 281 # tabby, tabby cat # target_category = 254 # pug, pug-dog grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category) grayscale_cam = grayscale_cam[0, :] visualization = show_cam_on_image(img / 255., grayscale_cam, use_rgb=True) plt.imshow(visualization) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/grad_cam/swin_model.py ================================================ """ Swin Transformer A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - https://arxiv.org/pdf/2103.14030 Code/weights from https://github.com/microsoft/Swin-Transformer """ import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint as checkpoint import numpy as np from typing import Optional def drop_path_f(x, drop_prob: float = 0., training: bool = False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path_f(x, self.drop_prob, self.training) def window_partition(x, window_size: int): """ 将feature map按照window_size划分成一个个没有重叠的window Args: x: (B, H, W, C) window_size (int): window size(M) Returns: windows: (num_windows*B, window_size, window_size, C) """ B, H, W, C = x.shape x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) # permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C] # view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C] windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) return windows def window_reverse(windows, window_size: int, H: int, W: int): """ 将一个个window还原成一个feature map Args: windows: (num_windows*B, window_size, window_size, C) window_size (int): Window size(M) H (int): Height of image W (int): Width of image Returns: x: (B, H, W, C) """ B = int(windows.shape[0] / (H * W / window_size / window_size)) # view: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C] x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C] # view: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C] x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) return x class PatchEmbed(nn.Module): """ 2D Image to Patch Embedding """ def __init__(self, patch_size=4, in_c=3, embed_dim=96, norm_layer=None): super().__init__() patch_size = (patch_size, patch_size) self.patch_size = patch_size self.in_chans = in_c self.embed_dim = embed_dim self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() def forward(self, x): _, _, H, W = x.shape # padding # 如果输入图片的H,W不是patch_size的整数倍,需要进行padding pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0) if pad_input: # to pad the last 3 dimensions, # (W_left, W_right, H_top,H_bottom, C_front, C_back) x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1], 0, self.patch_size[0] - H % self.patch_size[0], 0, 0)) # 下采样patch_size倍 x = self.proj(x) _, _, H, W = x.shape # flatten: [B, C, H, W] -> [B, C, HW] # transpose: [B, C, HW] -> [B, HW, C] x = x.flatten(2).transpose(1, 2) x = self.norm(x) return x, H, W class PatchMerging(nn.Module): r""" Patch Merging Layer. Args: dim (int): Number of input channels. norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm """ def __init__(self, dim, norm_layer=nn.LayerNorm): super().__init__() self.dim = dim self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) self.norm = norm_layer(4 * dim) def forward(self, x, H, W): """ x: B, H*W, C """ B, L, C = x.shape assert L == H * W, "input feature has wrong size" x = x.view(B, H, W, C) # padding # 如果输入feature map的H,W不是2的整数倍,需要进行padding pad_input = (H % 2 == 1) or (W % 2 == 1) if pad_input: # to pad the last 3 dimensions, starting from the last dimension and moving forward. # (C_front, C_back, W_left, W_right, H_top, H_bottom) # 注意这里的Tensor通道是[B, H, W, C],所以会和官方文档有些不同 x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) x0 = x[:, 0::2, 0::2, :] # [B, H/2, W/2, C] x1 = x[:, 1::2, 0::2, :] # [B, H/2, W/2, C] x2 = x[:, 0::2, 1::2, :] # [B, H/2, W/2, C] x3 = x[:, 1::2, 1::2, :] # [B, H/2, W/2, C] x = torch.cat([x0, x1, x2, x3], -1) # [B, H/2, W/2, 4*C] x = x.view(B, -1, 4 * C) # [B, H/2*W/2, 4*C] x = self.norm(x) x = self.reduction(x) # [B, H/2*W/2, 2*C] return x class Mlp(nn.Module): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Linear(in_features, hidden_features) self.act = act_layer() self.drop1 = nn.Dropout(drop) self.fc2 = nn.Linear(hidden_features, out_features) self.drop2 = nn.Dropout(drop) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.drop1(x) x = self.fc2(x) x = self.drop2(x) return x class WindowAttention(nn.Module): r""" Window based multi-head self attention (W-MSA) module with relative position bias. It supports both of shifted and non-shifted window. Args: dim (int): Number of input channels. window_size (tuple[int]): The height and width of the window. num_heads (int): Number of attention heads. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 proj_drop (float, optional): Dropout ratio of output. Default: 0.0 """ def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.): super().__init__() self.dim = dim self.window_size = window_size # [Mh, Mw] self.num_heads = num_heads head_dim = dim // num_heads self.scale = head_dim ** -0.5 # define a parameter table of relative position bias self.relative_position_bias_table = nn.Parameter( torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # [2*Mh-1 * 2*Mw-1, nH] # get pair-wise relative position index for each token inside the window coords_h = torch.arange(self.window_size[0]) coords_w = torch.arange(self.window_size[1]) coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing="ij")) # [2, Mh, Mw] coords_flatten = torch.flatten(coords, 1) # [2, Mh*Mw] # [2, Mh*Mw, 1] - [2, 1, Mh*Mw] relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # [2, Mh*Mw, Mh*Mw] relative_coords = relative_coords.permute(1, 2, 0).contiguous() # [Mh*Mw, Mh*Mw, 2] relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 relative_coords[:, :, 1] += self.window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 relative_position_index = relative_coords.sum(-1) # [Mh*Mw, Mh*Mw] self.register_buffer("relative_position_index", relative_position_index) self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) nn.init.trunc_normal_(self.relative_position_bias_table, std=.02) self.softmax = nn.Softmax(dim=-1) def forward(self, x, mask: Optional[torch.Tensor] = None): """ Args: x: input features with shape of (num_windows*B, Mh*Mw, C) mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None """ # [batch_size*num_windows, Mh*Mw, total_embed_dim] B_, N, C = x.shape # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim] # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head] # permute: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple) # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw] # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw] q = q * self.scale attn = (q @ k.transpose(-2, -1)) # relative_position_bias_table.view: [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH] relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # [nH, Mh*Mw, Mh*Mw] attn = attn + relative_position_bias.unsqueeze(0) if mask is not None: # mask: [nW, Mh*Mw, Mh*Mw] nW = mask.shape[0] # num_windows # attn.view: [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw] # mask.unsqueeze: [1, nW, 1, Mh*Mw, Mh*Mw] attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) attn = attn.view(-1, self.num_heads, N, N) attn = self.softmax(attn) else: attn = self.softmax(attn) attn = self.attn_drop(attn) # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head] # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim] x = (attn @ v).transpose(1, 2).reshape(B_, N, C) x = self.proj(x) x = self.proj_drop(x) return x class SwinTransformerBlock(nn.Module): r""" Swin Transformer Block. Args: dim (int): Number of input channels. num_heads (int): Number of attention heads. window_size (int): Window size. shift_size (int): Shift size for SW-MSA. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float, optional): Stochastic depth rate. Default: 0.0 act_layer (nn.Module, optional): Activation layer. Default: nn.GELU norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm """ def __init__(self, dim, num_heads, window_size=7, shift_size=0, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super().__init__() self.dim = dim self.num_heads = num_heads self.window_size = window_size self.shift_size = shift_size self.mlp_ratio = mlp_ratio assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" self.norm1 = norm_layer(dim) self.attn = WindowAttention( dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) def forward(self, x, attn_mask): H, W = self.H, self.W B, L, C = x.shape assert L == H * W, "input feature has wrong size" shortcut = x x = self.norm1(x) x = x.view(B, H, W, C) # pad feature maps to multiples of window size # 把feature map给pad到window size的整数倍 pad_l = pad_t = 0 pad_r = (self.window_size - W % self.window_size) % self.window_size pad_b = (self.window_size - H % self.window_size) % self.window_size x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) _, Hp, Wp, _ = x.shape # cyclic shift if self.shift_size > 0: shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) else: shifted_x = x attn_mask = None # partition windows x_windows = window_partition(shifted_x, self.window_size) # [nW*B, Mh, Mw, C] x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # [nW*B, Mh*Mw, C] # W-MSA/SW-MSA attn_windows = self.attn(x_windows, mask=attn_mask) # [nW*B, Mh*Mw, C] # merge windows attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) # [nW*B, Mh, Mw, C] shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # [B, H', W', C] # reverse cyclic shift if self.shift_size > 0: x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) else: x = shifted_x if pad_r > 0 or pad_b > 0: # 把前面pad的数据移除掉 x = x[:, :H, :W, :].contiguous() x = x.view(B, H * W, C) # FFN x = shortcut + self.drop_path(x) x = x + self.drop_path(self.mlp(self.norm2(x))) return x class BasicLayer(nn.Module): """ A basic Swin Transformer layer for one stage. Args: dim (int): Number of input channels. depth (int): Number of blocks. num_heads (int): Number of attention heads. window_size (int): Local window size. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. """ def __init__(self, dim, depth, num_heads, window_size, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False): super().__init__() self.dim = dim self.depth = depth self.window_size = window_size self.use_checkpoint = use_checkpoint self.shift_size = window_size // 2 # build blocks self.blocks = nn.ModuleList([ SwinTransformerBlock( dim=dim, num_heads=num_heads, window_size=window_size, shift_size=0 if (i % 2 == 0) else self.shift_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop, attn_drop=attn_drop, drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, norm_layer=norm_layer) for i in range(depth)]) # patch merging layer if downsample is not None: self.downsample = downsample(dim=dim, norm_layer=norm_layer) else: self.downsample = None def create_mask(self, x, H, W): # calculate attention mask for SW-MSA # 保证Hp和Wp是window_size的整数倍 Hp = int(np.ceil(H / self.window_size)) * self.window_size Wp = int(np.ceil(W / self.window_size)) * self.window_size # 拥有和feature map一样的通道排列顺序,方便后续window_partition img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # [1, Hp, Wp, 1] h_slices = (slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None)) w_slices = (slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None)) cnt = 0 for h in h_slices: for w in w_slices: img_mask[:, h, w, :] = cnt cnt += 1 mask_windows = window_partition(img_mask, self.window_size) # [nW, Mh, Mw, 1] mask_windows = mask_windows.view(-1, self.window_size * self.window_size) # [nW, Mh*Mw] attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1] # [nW, Mh*Mw, Mh*Mw] attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) return attn_mask def forward(self, x, H, W): attn_mask = self.create_mask(x, H, W) # [nW, Mh*Mw, Mh*Mw] for blk in self.blocks: blk.H, blk.W = H, W if not torch.jit.is_scripting() and self.use_checkpoint: x = checkpoint.checkpoint(blk, x, attn_mask) else: x = blk(x, attn_mask) if self.downsample is not None: x = self.downsample(x, H, W) H, W = (H + 1) // 2, (W + 1) // 2 return x, H, W class SwinTransformer(nn.Module): r""" Swin Transformer A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - https://arxiv.org/pdf/2103.14030 Args: patch_size (int | tuple(int)): Patch size. Default: 4 in_chans (int): Number of input image channels. Default: 3 num_classes (int): Number of classes for classification head. Default: 1000 embed_dim (int): Patch embedding dimension. Default: 96 depths (tuple(int)): Depth of each Swin Transformer layer. num_heads (tuple(int)): Number of attention heads in different layers. window_size (int): Window size. Default: 7 mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True drop_rate (float): Dropout rate. Default: 0 attn_drop_rate (float): Attention dropout rate. Default: 0 drop_path_rate (float): Stochastic depth rate. Default: 0.1 norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. patch_norm (bool): If True, add normalization after patch embedding. Default: True use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False """ def __init__(self, patch_size=4, in_chans=3, num_classes=1000, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), window_size=7, mlp_ratio=4., qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=nn.LayerNorm, patch_norm=True, use_checkpoint=False, **kwargs): super().__init__() self.num_classes = num_classes self.num_layers = len(depths) self.embed_dim = embed_dim self.patch_norm = patch_norm # stage4输出特征矩阵的channels self.num_features = int(embed_dim * 2 ** (self.num_layers - 1)) self.mlp_ratio = mlp_ratio # split image into non-overlapping patches self.patch_embed = PatchEmbed( patch_size=patch_size, in_c=in_chans, embed_dim=embed_dim, norm_layer=norm_layer if self.patch_norm else None) self.pos_drop = nn.Dropout(p=drop_rate) # stochastic depth dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule # build layers self.layers = nn.ModuleList() for i_layer in range(self.num_layers): # 注意这里构建的stage和论文图中有些差异 # 这里的stage不包含该stage的patch_merging层,包含的是下个stage的 layers = BasicLayer(dim=int(embed_dim * 2 ** i_layer), depth=depths[i_layer], num_heads=num_heads[i_layer], window_size=window_size, mlp_ratio=self.mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], norm_layer=norm_layer, downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, use_checkpoint=use_checkpoint) self.layers.append(layers) self.norm = norm_layer(self.num_features) self.avgpool = nn.AdaptiveAvgPool1d(1) self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() self.apply(self._init_weights) def _init_weights(self, m): if isinstance(m, nn.Linear): nn.init.trunc_normal_(m.weight, std=.02) if isinstance(m, nn.Linear) and m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.LayerNorm): nn.init.constant_(m.bias, 0) nn.init.constant_(m.weight, 1.0) def forward(self, x): # x: [B, L, C] x, H, W = self.patch_embed(x) x = self.pos_drop(x) for layer in self.layers: x, H, W = layer(x, H, W) x = self.norm(x) # [B, L, C] x = self.avgpool(x.transpose(1, 2)) # [B, C, 1] x = torch.flatten(x, 1) x = self.head(x) return x def swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), num_classes=num_classes, **kwargs) return model def swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=96, depths=(2, 2, 18, 2), num_heads=(3, 6, 12, 24), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), num_classes=num_classes, **kwargs) return model def swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), num_classes=num_classes, **kwargs) return model ================================================ FILE: pytorch_classification/grad_cam/utils.py ================================================ import cv2 import numpy as np class ActivationsAndGradients: """ Class for extracting activations and registering gradients from targeted intermediate layers """ def __init__(self, model, target_layers, reshape_transform): self.model = model self.gradients = [] self.activations = [] self.reshape_transform = reshape_transform self.handles = [] for target_layer in target_layers: self.handles.append( target_layer.register_forward_hook( self.save_activation)) # Backward compatibility with older pytorch versions: if hasattr(target_layer, 'register_full_backward_hook'): self.handles.append( target_layer.register_full_backward_hook( self.save_gradient)) else: self.handles.append( target_layer.register_backward_hook( self.save_gradient)) def save_activation(self, module, input, output): activation = output if self.reshape_transform is not None: activation = self.reshape_transform(activation) self.activations.append(activation.cpu().detach()) def save_gradient(self, module, grad_input, grad_output): # Gradients are computed in reverse order grad = grad_output[0] if self.reshape_transform is not None: grad = self.reshape_transform(grad) self.gradients = [grad.cpu().detach()] + self.gradients def __call__(self, x): self.gradients = [] self.activations = [] return self.model(x) def release(self): for handle in self.handles: handle.remove() class GradCAM: def __init__(self, model, target_layers, reshape_transform=None, use_cuda=False): self.model = model.eval() self.target_layers = target_layers self.reshape_transform = reshape_transform self.cuda = use_cuda if self.cuda: self.model = model.cuda() self.activations_and_grads = ActivationsAndGradients( self.model, target_layers, reshape_transform) """ Get a vector of weights for every channel in the target layer. Methods that return weights channels, will typically need to only implement this function. """ @staticmethod def get_cam_weights(grads): return np.mean(grads, axis=(2, 3), keepdims=True) @staticmethod def get_loss(output, target_category): loss = 0 for i in range(len(target_category)): loss = loss + output[i, target_category[i]] return loss def get_cam_image(self, activations, grads): weights = self.get_cam_weights(grads) weighted_activations = weights * activations cam = weighted_activations.sum(axis=1) return cam @staticmethod def get_target_width_height(input_tensor): width, height = input_tensor.size(-1), input_tensor.size(-2) return width, height def compute_cam_per_layer(self, input_tensor): activations_list = [a.cpu().data.numpy() for a in self.activations_and_grads.activations] grads_list = [g.cpu().data.numpy() for g in self.activations_and_grads.gradients] target_size = self.get_target_width_height(input_tensor) cam_per_target_layer = [] # Loop over the saliency image from every layer for layer_activations, layer_grads in zip(activations_list, grads_list): cam = self.get_cam_image(layer_activations, layer_grads) cam[cam < 0] = 0 # works like mute the min-max scale in the function of scale_cam_image scaled = self.scale_cam_image(cam, target_size) cam_per_target_layer.append(scaled[:, None, :]) return cam_per_target_layer def aggregate_multi_layers(self, cam_per_target_layer): cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1) cam_per_target_layer = np.maximum(cam_per_target_layer, 0) result = np.mean(cam_per_target_layer, axis=1) return self.scale_cam_image(result) @staticmethod def scale_cam_image(cam, target_size=None): result = [] for img in cam: img = img - np.min(img) img = img / (1e-7 + np.max(img)) if target_size is not None: img = cv2.resize(img, target_size) result.append(img) result = np.float32(result) return result def __call__(self, input_tensor, target_category=None): if self.cuda: input_tensor = input_tensor.cuda() # 正向传播得到网络输出logits(未经过softmax) output = self.activations_and_grads(input_tensor) if isinstance(target_category, int): target_category = [target_category] * input_tensor.size(0) if target_category is None: target_category = np.argmax(output.cpu().data.numpy(), axis=-1) print(f"category id: {target_category}") else: assert (len(target_category) == input_tensor.size(0)) self.model.zero_grad() loss = self.get_loss(output, target_category) loss.backward(retain_graph=True) # In most of the saliency attribution papers, the saliency is # computed with a single target layer. # Commonly it is the last convolutional layer. # Here we support passing a list with multiple target layers. # It will compute the saliency image for every image, # and then aggregate them (with a default mean aggregation). # This gives you more flexibility in case you just want to # use all conv layers for example, all Batchnorm layers, # or something else. cam_per_layer = self.compute_cam_per_layer(input_tensor) return self.aggregate_multi_layers(cam_per_layer) def __del__(self): self.activations_and_grads.release() def __enter__(self): return self def __exit__(self, exc_type, exc_value, exc_tb): self.activations_and_grads.release() if isinstance(exc_value, IndexError): # Handle IndexError here... print( f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}") return True def show_cam_on_image(img: np.ndarray, mask: np.ndarray, use_rgb: bool = False, colormap: int = cv2.COLORMAP_JET) -> np.ndarray: """ This function overlays the cam mask on the image as an heatmap. By default the heatmap is in BGR format. :param img: The base image in RGB or BGR format. :param mask: The cam mask. :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format. :param colormap: The OpenCV colormap to be used. :returns: The default image with the cam overlay. """ heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) if use_rgb: heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) heatmap = np.float32(heatmap) / 255 if np.max(img) > 1: raise Exception( "The input image should np.float32 in the range [0, 1]") cam = heatmap + img cam = cam / np.max(cam) return np.uint8(255 * cam) def center_crop_img(img: np.ndarray, size: int): h, w, c = img.shape if w == h == size: return img if w < h: ratio = size / w new_w = size new_h = int(h * ratio) else: ratio = size / h new_h = size new_w = int(w * ratio) img = cv2.resize(img, dsize=(new_w, new_h)) if new_w == size: h = (new_h - size) // 2 img = img[h: h+size] else: w = (new_w - size) // 2 img = img[:, w: w+size] return img ================================================ FILE: pytorch_classification/grad_cam/vit_model.py ================================================ """ original code from rwightman: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py """ from functools import partial from collections import OrderedDict import torch import torch.nn as nn def drop_path(x, drop_prob: float = 0., training: bool = False): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class PatchEmbed(nn.Module): """ 2D Image to Patch Embedding """ def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None): super().__init__() img_size = (img_size, img_size) patch_size = (patch_size, patch_size) self.img_size = img_size self.patch_size = patch_size self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) self.num_patches = self.grid_size[0] * self.grid_size[1] self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() def forward(self, x): B, C, H, W = x.shape assert H == self.img_size[0] and W == self.img_size[1], \ f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." # flatten: [B, C, H, W] -> [B, C, HW] # transpose: [B, C, HW] -> [B, HW, C] x = self.proj(x).flatten(2).transpose(1, 2) x = self.norm(x) return x class Attention(nn.Module): def __init__(self, dim, # 输入token的dim num_heads=8, qkv_bias=False, qk_scale=None, attn_drop_ratio=0., proj_drop_ratio=0.): super(Attention, self).__init__() self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop_ratio) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop_ratio) def forward(self, x): # [batch_size, num_patches + 1, total_embed_dim] B, N, C = x.shape # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim] # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head] # permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head] qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) # [batch_size, num_heads, num_patches + 1, embed_dim_per_head] q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1] # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1] attn = (q @ k.transpose(-2, -1)) * self.scale attn = attn.softmax(dim=-1) attn = self.attn_drop(attn) # @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head] # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head] # reshape: -> [batch_size, num_patches + 1, total_embed_dim] x = (attn @ v).transpose(1, 2).reshape(B, N, C) x = self.proj(x) x = self.proj_drop(x) return x class Mlp(nn.Module): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Linear(in_features, hidden_features) self.act = act_layer() self.fc2 = nn.Linear(hidden_features, out_features) self.drop = nn.Dropout(drop) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.drop(x) x = self.fc2(x) x = self.drop(x) return x class Block(nn.Module): def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super(Block, self).__init__() self.norm1 = norm_layer(dim) self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio) # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio) def forward(self, x): x = x + self.drop_path(self.attn(self.norm1(x))) x = x + self.drop_path(self.mlp(self.norm2(x))) return x class VisionTransformer(nn.Module): def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True, qk_scale=None, representation_size=None, distilled=False, drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None, act_layer=None): """ Args: img_size (int, tuple): input image size patch_size (int, tuple): patch size in_c (int): number of input channels num_classes (int): number of classes for classification head embed_dim (int): embedding dimension depth (int): depth of transformer num_heads (int): number of attention heads mlp_ratio (int): ratio of mlp hidden dim to embedding dim qkv_bias (bool): enable bias for qkv if True qk_scale (float): override default qk scale of head_dim ** -0.5 if set representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set distilled (bool): model includes a distillation token and head as in DeiT models drop_ratio (float): dropout rate attn_drop_ratio (float): attention dropout rate drop_path_ratio (float): stochastic depth rate embed_layer (nn.Module): patch embedding layer norm_layer: (nn.Module): normalization layer """ super(VisionTransformer, self).__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models self.num_tokens = 2 if distilled else 1 norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) act_layer = act_layer or nn.GELU self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim)) self.pos_drop = nn.Dropout(p=drop_ratio) dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)] # stochastic depth decay rule self.blocks = nn.Sequential(*[ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i], norm_layer=norm_layer, act_layer=act_layer) for i in range(depth) ]) self.norm = norm_layer(embed_dim) # Representation layer if representation_size and not distilled: self.has_logits = True self.num_features = representation_size self.pre_logits = nn.Sequential(OrderedDict([ ("fc", nn.Linear(embed_dim, representation_size)), ("act", nn.Tanh()) ])) else: self.has_logits = False self.pre_logits = nn.Identity() # Classifier head(s) self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() self.head_dist = None if distilled: self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() # Weight init nn.init.trunc_normal_(self.pos_embed, std=0.02) if self.dist_token is not None: nn.init.trunc_normal_(self.dist_token, std=0.02) nn.init.trunc_normal_(self.cls_token, std=0.02) self.apply(_init_vit_weights) def forward_features(self, x): # [B, C, H, W] -> [B, num_patches, embed_dim] x = self.patch_embed(x) # [B, 196, 768] # [1, 1, 768] -> [B, 1, 768] cls_token = self.cls_token.expand(x.shape[0], -1, -1) if self.dist_token is None: x = torch.cat((cls_token, x), dim=1) # [B, 197, 768] else: x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1) x = self.pos_drop(x + self.pos_embed) x = self.blocks(x) x = self.norm(x) if self.dist_token is None: return self.pre_logits(x[:, 0]) else: return x[:, 0], x[:, 1] def forward(self, x): x = self.forward_features(x) if self.head_dist is not None: x, x_dist = self.head(x[0]), self.head_dist(x[1]) if self.training and not torch.jit.is_scripting(): # during inference, return the average of both classifier predictions return x, x_dist else: return (x + x_dist) / 2 else: x = self.head(x) return x def _init_vit_weights(m): """ ViT weight initialization :param m: module """ if isinstance(m, nn.Linear): nn.init.trunc_normal_(m.weight, std=.01) if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.LayerNorm): nn.init.zeros_(m.bias) nn.init.ones_(m.weight) def vit_base_patch16_224(num_classes: int = 1000): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: 链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA 密码: eu9f """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=None, num_classes=num_classes) return model def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=768 if has_logits else None, num_classes=num_classes) return model def vit_base_patch32_224(num_classes: int = 1000): """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: 链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg 密码: s5hl """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=768, depth=12, num_heads=12, representation_size=None, num_classes=num_classes) return model def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=768, depth=12, num_heads=12, representation_size=768 if has_logits else None, num_classes=num_classes) return model def vit_large_patch16_224(num_classes: int = 1000): """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: 链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ 密码: qqt8 """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=None, num_classes=num_classes) return model def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=1024 if has_logits else None, num_classes=num_classes) return model def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=1024, depth=24, num_heads=16, representation_size=1024 if has_logits else None, num_classes=num_classes) return model def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. NOTE: converted weights not currently available, too large for github release hosting. """ model = VisionTransformer(img_size=224, patch_size=14, embed_dim=1280, depth=32, num_heads=16, representation_size=1280 if has_logits else None, num_classes=num_classes) return model ================================================ FILE: pytorch_classification/mini_imagenet/README.md ================================================ ## download mini-imagenet link: [https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ](https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ) password: hl31 ## dataset path structure ``` ├── mini-imagenet: total 100 classes, 60000 images ├── images: 60000 images ├── train.csv: 64 classes, 38400 images ├── val.csv: 16 classes, 9600 images └── test.csv: 20 classes, 12000 images ``` ================================================ FILE: pytorch_classification/mini_imagenet/imagenet_class_index.json ================================================ {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} ================================================ FILE: pytorch_classification/mini_imagenet/model.py ================================================ from typing import List, Callable import torch from torch import Tensor import torch.nn as nn def channel_shuffle(x: Tensor, groups: int) -> Tensor: batch_size, num_channels, height, width = x.size() channels_per_group = num_channels // groups # reshape # [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width] x = x.view(batch_size, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() # flatten x = x.view(batch_size, -1, height, width) return x class InvertedResidual(nn.Module): def __init__(self, input_c: int, output_c: int, stride: int): super(InvertedResidual, self).__init__() if stride not in [1, 2]: raise ValueError("illegal stride value.") self.stride = stride assert output_c % 2 == 0 branch_features = output_c // 2 # 当stride为1时,input_channel应该是branch_features的两倍 # python中 '<<' 是位运算,可理解为计算×2的快速方法 assert (self.stride != 1) or (input_c == branch_features << 1) if self.stride == 2: self.branch1 = nn.Sequential( self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1), nn.BatchNorm2d(input_c), nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True) ) else: self.branch1 = nn.Sequential() self.branch2 = nn.Sequential( nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1), nn.BatchNorm2d(branch_features), nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True) ) @staticmethod def depthwise_conv(input_c: int, output_c: int, kernel_s: int, stride: int = 1, padding: int = 0, bias: bool = False) -> nn.Conv2d: return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s, stride=stride, padding=padding, bias=bias, groups=input_c) def forward(self, x: Tensor) -> Tensor: if self.stride == 1: x1, x2 = x.chunk(2, dim=1) out = torch.cat((x1, self.branch2(x2)), dim=1) else: out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) out = channel_shuffle(out, 2) return out class ShuffleNetV2(nn.Module): def __init__(self, stages_repeats: List[int], stages_out_channels: List[int], num_classes: int = 1000, inverted_residual: Callable[..., nn.Module] = InvertedResidual): super(ShuffleNetV2, self).__init__() if len(stages_repeats) != 3: raise ValueError("expected stages_repeats as list of 3 positive ints") if len(stages_out_channels) != 5: raise ValueError("expected stages_out_channels as list of 5 positive ints") self._stage_out_channels = stages_out_channels # input RGB image input_channels = 3 output_channels = self._stage_out_channels[0] self.conv1 = nn.Sequential( nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(output_channels), nn.ReLU(inplace=True) ) input_channels = output_channels self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Static annotations for mypy self.stage2: nn.Sequential self.stage3: nn.Sequential self.stage4: nn.Sequential stage_names = ["stage{}".format(i) for i in [2, 3, 4]] for name, repeats, output_channels in zip(stage_names, stages_repeats, self._stage_out_channels[1:]): seq = [inverted_residual(input_channels, output_channels, 2)] for i in range(repeats - 1): seq.append(inverted_residual(output_channels, output_channels, 1)) setattr(self, name, nn.Sequential(*seq)) input_channels = output_channels output_channels = self._stage_out_channels[-1] self.conv5 = nn.Sequential( nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(output_channels), nn.ReLU(inplace=True) ) self.fc = nn.Linear(output_channels, num_classes) def _forward_impl(self, x: Tensor) -> Tensor: # See note [TorchScript super()] x = self.conv1(x) x = self.maxpool(x) x = self.stage2(x) x = self.stage3(x) x = self.stage4(x) x = self.conv5(x) x = x.mean([2, 3]) # global pool x = self.fc(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def shufflenet_v2_x1_0(num_classes=1000): """ Constructs a ShuffleNetV2 with 1.0x output channels, as described in `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" `. weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth :param num_classes: :return: """ model = ShuffleNetV2(stages_repeats=[4, 8, 4], stages_out_channels=[24, 116, 232, 464, 1024], num_classes=num_classes) return model def shufflenet_v2_x0_5(num_classes=1000): """ Constructs a ShuffleNetV2 with 0.5x output channels, as described in `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" `. weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth :param num_classes: :return: """ model = ShuffleNetV2(stages_repeats=[4, 8, 4], stages_out_channels=[24, 48, 96, 192, 1024], num_classes=num_classes) return model ================================================ FILE: pytorch_classification/mini_imagenet/multi_train_utils/__init__.py ================================================ from .train_eval_utils import train_one_epoch, evaluate from .distributed_utils import init_distributed_mode, dist, cleanup ================================================ FILE: pytorch_classification/mini_imagenet/multi_train_utils/distributed_utils.py ================================================ import os import torch import torch.distributed as dist def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' # 通信后端,nvidia GPU推荐使用NCCL print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) dist.barrier() def cleanup(): dist.destroy_process_group() def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def reduce_value(value, average=True): world_size = get_world_size() if world_size < 2: # 单GPU的情况 return value with torch.no_grad(): dist.all_reduce(value) if average: value /= world_size return value def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) ================================================ FILE: pytorch_classification/mini_imagenet/multi_train_utils/train_eval_utils.py ================================================ import sys from tqdm import tqdm import torch from .distributed_utils import reduce_value, is_main_process, warmup_lr_scheduler def train_one_epoch(model, optimizer, data_loader, device, epoch, use_amp=False, warmup=True): model.train() loss_function = torch.nn.CrossEntropyLoss() accu_loss = torch.zeros(1).to(device) # 累计损失 accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 optimizer.zero_grad() lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) # 在进程0中打印训练进度 if is_main_process(): data_loader = tqdm(data_loader, file=sys.stdout) enable_amp = use_amp and "cuda" in device.type scaler = torch.cuda.amp.GradScaler(enabled=enable_amp) sample_num = 0 for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] with torch.cuda.amp.autocast(enabled=enable_amp): pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() optimizer.zero_grad() loss = reduce_value(loss, average=True) accu_loss += loss.detach() # 在进程0中打印平均loss if is_main_process(): info = "[epoch {}] loss: {:.3f}, train_acc: {:.3f}, lr: {:.5f}".format( epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num, optimizer.param_groups[0]["lr"]) data_loader.desc = info if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) if lr_scheduler is not None: # 如果使用warmup训练,逐渐调整学习率 lr_scheduler.step() # 等待所有进程计算完毕 if device != torch.device("cpu"): torch.cuda.synchronize(device) return accu_loss.item() / (step + 1) @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 验证集样本个数 num_samples = len(data_loader.dataset) # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) # 在进程0中打印验证进度 if is_main_process(): data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() # 等待所有进程计算完毕 if device != torch.device("cpu"): torch.cuda.synchronize(device) sum_num = reduce_value(sum_num, average=False) acc = sum_num.item() / num_samples return acc ================================================ FILE: pytorch_classification/mini_imagenet/my_dataset.py ================================================ import os import json from PIL import Image import pandas as pd import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, root_dir: str, csv_name: str, json_path: str, transform=None): images_dir = os.path.join(root_dir, "images") assert os.path.exists(images_dir), "dir:'{}' not found.".format(images_dir) assert os.path.exists(json_path), "file:'{}' not found.".format(json_path) self.label_dict = json.load(open(json_path, "r")) csv_path = os.path.join(root_dir, csv_name) assert os.path.exists(csv_path), "file:'{}' not found.".format(csv_path) csv_data = pd.read_csv(csv_path) self.total_num = csv_data.shape[0] self.img_paths = [os.path.join(images_dir, i)for i in csv_data["filename"].values] self.img_label = [self.label_dict[i][0] for i in csv_data["label"].values] self.labels = set(csv_data["label"].values) self.transform = transform def __len__(self): return self.total_num def __getitem__(self, item): img = Image.open(self.img_paths[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.img_paths[item])) label = self.img_label[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/mini_imagenet/restructure_csv.py ================================================ import os import json import pandas as pd from PIL import Image import matplotlib.pyplot as plt def read_csv_classes(csv_dir: str, csv_name: str): data = pd.read_csv(os.path.join(csv_dir, csv_name)) # print(data.head(1)) # filename, label label_set = set(data["label"].drop_duplicates().values) print("{} have {} images and {} classes.".format(csv_name, data.shape[0], len(label_set))) return data, label_set def calculate_split_info(path: str, label_dict: dict, rate: float = 0.2): # read all images image_dir = os.path.join(path, "images") images_list = [i for i in os.listdir(image_dir) if i.endswith(".jpg")] print("find {} images in dataset.".format(len(images_list))) train_data, train_label = read_csv_classes(path, "train.csv") val_data, val_label = read_csv_classes(path, "val.csv") test_data, test_label = read_csv_classes(path, "test.csv") # Union operation labels = (train_label | val_label | test_label) labels = list(labels) labels.sort() print("all classes: {}".format(len(labels))) # create classes_name.json classes_label = dict([(label, [index, label_dict[label]]) for index, label in enumerate(labels)]) json_str = json.dumps(classes_label, indent=4) with open('classes_name.json', 'w') as json_file: json_file.write(json_str) # concat csv data data = pd.concat([train_data, val_data, test_data], axis=0) print("total data shape: {}".format(data.shape)) # split data on every classes num_every_classes = [] split_train_data = [] split_val_data = [] for label in labels: class_data = data[data["label"] == label] num_every_classes.append(class_data.shape[0]) # shuffle shuffle_data = class_data.sample(frac=1, random_state=1) num_train_sample = int(class_data.shape[0] * (1 - rate)) split_train_data.append(shuffle_data[:num_train_sample]) split_val_data.append(shuffle_data[num_train_sample:]) # imshow imshow_flag = False if imshow_flag: img_name, img_label = shuffle_data.iloc[0].values img = Image.open(os.path.join(image_dir, img_name)) plt.imshow(img) plt.title("class: " + classes_label[img_label][1]) plt.show() # plot classes distribution plot_flag = False if plot_flag: plt.bar(range(1, 101), num_every_classes, align='center') plt.show() # concatenate data new_train_data = pd.concat(split_train_data, axis=0) new_val_data = pd.concat(split_val_data, axis=0) # save new csv data new_train_data.to_csv(os.path.join(path, "new_train.csv")) new_val_data.to_csv(os.path.join(path, "new_val.csv")) def main(): data_dir = "/data/mini-imagenet/" json_path = "./imagenet_class_index.json" # load imagenet labels label_dict = json.load(open(json_path, "r")) label_dict = dict([(v[0], v[1]) for k, v in label_dict.items()]) calculate_split_info(data_dir, label_dict) if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/mini_imagenet/train_multi_gpu_using_launch.py ================================================ import os import math import tempfile import argparse import torch import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from model import shufflenet_v2_x1_0 from my_dataset import MyDataSet from multi_train_utils import train_one_epoch, evaluate, init_distributed_mode, dist, cleanup def main(args): if torch.cuda.is_available() is False: raise EnvironmentError("not find GPU device for training.") # 初始化各进程环境 init_distributed_mode(args=args) rank = args.rank device = torch.device(args.device) batch_size = args.batch_size num_classes = args.num_classes weights_path = args.weights args.lr *= args.world_size # 学习率要根据并行GPU的数量进行倍增 if rank == 0: # 在第一个进程中打印信息,并实例化tensorboard print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} data_root = args.data_path json_path = "./classes_name.json" # 实例化训练数据集 train_dataset = MyDataSet(root_dir=data_root, csv_name="new_train.csv", json_path=json_path, transform=data_transform["train"]) # check num_classes if args.num_classes != len(train_dataset.labels): raise ValueError("dataset have {} classes, but input {}".format(len(train_dataset.labels), args.num_classes)) # 实例化验证数据集 val_dataset = MyDataSet(root_dir=data_root, csv_name="new_val.csv", json_path=json_path, transform=data_transform["val"]) # 给每个rank对应的进程分配训练的样本索引 train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) # 将样本索引每batch_size个元素组成一个list train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, batch_size, drop_last=True) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers if rank == 0: print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, sampler=val_sampler, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # 实例化模型 model = shufflenet_v2_x1_0(num_classes=num_classes).to(device) # 如果存在预训练权重则载入 if os.path.exists(weights_path): weights_dict = torch.load(weights_path, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(load_weights_dict, strict=False) else: checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt") # 如果不存在预训练权重,需要将第一个进程中的权重保存,然后其他进程载入,保持初始化权重一致 if rank == 0: torch.save(model.state_dict(), checkpoint_path) dist.barrier() # 这里注意,一定要指定map_location参数,否则会导致第一块GPU占用更多资源 model.load_state_dict(torch.load(checkpoint_path, map_location=device)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) else: # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义 if args.syncBN: # 使用SyncBatchNorm后训练会更耗时 model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) # 转为DDP模型 model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): train_sampler.set_epoch(epoch) mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() acc = evaluate(model=model, data_loader=val_loader, device=device) if rank == 0: print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch)) # 删除临时缓存文件 if rank == 0: if os.path.exists(checkpoint_path) is True: os.remove(checkpoint_path) cleanup() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=100) parser.add_argument('--epochs', type=int, default=100) parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--lrf', type=float, default=0.0001) # 是否启用SyncBatchNorm parser.add_argument('--syncBN', type=bool, default=True) # 数据集所在根目录 parser.add_argument('--data-path', type=str, default="/home/wz/mini-imagenet/") parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) # 不要改该参数,系统会自动分配 parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') # 开启的进程数(注意不是线程),不用设置该参数,会根据nproc_per_node自动设置 parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/mini_imagenet/train_single_gpu.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import shufflenet_v2_x1_0 from my_dataset import MyDataSet from multi_train_utils import train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} data_root = args.data_path json_path = "./classes_name.json" # 实例化训练数据集 train_dataset = MyDataSet(root_dir=data_root, csv_name="new_train.csv", json_path=json_path, transform=data_transform["train"]) # check num_classes if args.num_classes != len(train_dataset.labels): raise ValueError("dataset have {} classes, but input {}".format(len(train_dataset.labels), args.num_classes)) # 实例化验证数据集 val_dataset = MyDataSet(root_dir=data_root, csv_name="new_val.csv", json_path=json_path, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model model = shufflenet_v2_x1_0(num_classes=args.num_classes).to(device) # 如果存在预训练权重则载入 # if args.weights != "": # if os.path.exists(args.weights): # weights_dict = torch.load(args.weights, map_location=device) # load_weights_dict = {k: v for k, v in weights_dict.items() # if model.state_dict()[k].numel() == v.numel()} # print(model.load_state_dict(load_weights_dict, strict=False)) # else: # raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 # if args.freeze_layers: # for name, para in model.named_parameters(): # # 除最后的全连接层外,其他权重全部冻结 # if "fc" not in name: # para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch, warmup=True) scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=100) parser.add_argument('--epochs', type=int, default=100) parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.1) parser.add_argument('--lrf', type=float, default=0.0001) # 数据集所在根目录 parser.add_argument('--data-path', type=str, default="/home/wz/mini-imagenet/") parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/model_complexity/main.py ================================================ import torch from fvcore.nn import FlopCountAnalysis, parameter_count_table from prettytable import PrettyTable from model import efficientnetv2_s def main(): model = efficientnetv2_s() # option1 for name, para in model.named_parameters(): # 除head外,其他权重全部冻结 if "head" not in name: para.requires_grad_(False) else: print("training {}".format(name)) complexity = model.complexity(224, 224, 3) table = PrettyTable() table.field_names = ["params", "freeze-params", "train-params", "FLOPs", "acts"] table.add_row([complexity["params"], complexity["freeze"], complexity["params"] - complexity["freeze"], complexity["flops"], complexity["acts"]]) print(table) # option2 tensor = (torch.rand(1, 3, 224, 224),) flops = FlopCountAnalysis(model, tensor) print(flops.total()) print(parameter_count_table(model)) if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/model_complexity/model.py ================================================ from collections import OrderedDict from functools import partial from typing import Callable, Optional import torch.nn as nn import torch from torch import Tensor from utils import * def drop_path(x, drop_prob: float = 0., training: bool = False): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf This function is taken from the rwightman. It can be seen here: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140 """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class ConvBNAct(nn.Module): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None): super(ConvBNAct, self).__init__() padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.SiLU # alias Swish (torch>=1.7) self.conv = nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False) self.bn = norm_layer(out_planes) self.act = activation_layer() def forward(self, x): result = self.conv(x) result = self.bn(result) result = self.act(result) return result def complexity(self, cx): cx = conv2d_cx(cx, in_c=self.conv.in_channels, out_c=self.conv.out_channels, k=self.conv.kernel_size[0], # tuple type stride=self.conv.stride[0], # tuple type groups=self.conv.groups, bias=False, trainable=self.conv.weight.requires_grad) cx = norm2d_cx(cx, self.conv.out_channels, trainable=self.bn.weight.requires_grad) return cx class SqueezeExcite(nn.Module): def __init__(self, input_c: int, # block input channel expand_c: int, # block expand channel se_ratio: float = 0.25): super(SqueezeExcite, self).__init__() squeeze_c = int(input_c * se_ratio) self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1) self.act1 = nn.SiLU() # alias Swish self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1) self.act2 = nn.Sigmoid() def forward(self, x: Tensor) -> Tensor: scale = x.mean((2, 3), keepdim=True) scale = self.conv_reduce(scale) scale = self.act1(scale) scale = self.conv_expand(scale) scale = self.act2(scale) return scale * x def complexity(self, cx): h, w = cx["h"], cx["w"] cx = gap2d_cx(cx) cx = conv2d_cx(cx, in_c=self.conv_reduce.in_channels, out_c=self.conv_reduce.out_channels, k=1, bias=True, trainable=self.conv_reduce.weight.requires_grad) cx = conv2d_cx(cx, in_c=self.conv_expand.in_channels, out_c=self.conv_expand.out_channels, k=1, bias=True, trainable=self.conv_expand.weight.requires_grad) cx["h"], cx["w"] = h, w return cx class MBConv(nn.Module): def __init__(self, kernel_size: int, input_c: int, out_c: int, expand_ratio: int, stride: int, se_ratio: float, drop_rate: float, norm_layer: Callable[..., nn.Module]): super(MBConv, self).__init__() if stride not in [1, 2]: raise ValueError("illegal stride value.") self.has_shortcut = (stride == 1 and input_c == out_c) activation_layer = nn.SiLU # alias Swish expanded_c = input_c * expand_ratio # 在EfficientNetV2中,MBConv中不存在expansion=1的情况所以conv_pw肯定存在 assert expand_ratio != 1 # Point-wise expansion self.expand_conv = ConvBNAct(input_c, expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer) # Depth-wise convolution self.dwconv = ConvBNAct(expanded_c, expanded_c, kernel_size=kernel_size, stride=stride, groups=expanded_c, norm_layer=norm_layer, activation_layer=activation_layer) self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity() # Point-wise linear projection self.project_conv = ConvBNAct(expanded_c, out_planes=out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) # 注意这里没有激活函数,所有传入Identity self.out_channels = out_c # 只有在使用shortcut连接时才使用dropout层 self.drop_rate = drop_rate if self.has_shortcut and drop_rate > 0: self.dropout = DropPath(drop_rate) def forward(self, x: Tensor) -> Tensor: result = self.expand_conv(x) result = self.dwconv(result) result = self.se(result) result = self.project_conv(result) if self.has_shortcut: if self.drop_rate > 0: result = self.dropout(result) result += x return result def complexity(self, cx): cx = self.expand_conv.complexity(cx) cx = self.dwconv.complexity(cx) cx = self.se.complexity(cx) cx = self.project_conv.complexity(cx) return cx class FusedMBConv(nn.Module): def __init__(self, kernel_size: int, input_c: int, out_c: int, expand_ratio: int, stride: int, se_ratio: float, drop_rate: float, norm_layer: Callable[..., nn.Module]): super(FusedMBConv, self).__init__() assert stride in [1, 2] assert se_ratio == 0 self.has_shortcut = stride == 1 and input_c == out_c self.drop_rate = drop_rate self.has_expansion = expand_ratio != 1 activation_layer = nn.SiLU # alias Swish expanded_c = input_c * expand_ratio # 只有当expand ratio不等于1时才有expand conv if self.has_expansion: # Expansion convolution self.expand_conv = ConvBNAct(input_c, expanded_c, kernel_size=kernel_size, stride=stride, norm_layer=norm_layer, activation_layer=activation_layer) self.project_conv = ConvBNAct(expanded_c, out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) # 注意没有激活函数 else: # 当只有project_conv时的情况 self.project_conv = ConvBNAct(input_c, out_c, kernel_size=kernel_size, stride=stride, norm_layer=norm_layer, activation_layer=activation_layer) # 注意有激活函数 self.out_channels = out_c # 只有在使用shortcut连接时才使用dropout层 self.drop_rate = drop_rate if self.has_shortcut and drop_rate > 0: self.dropout = DropPath(drop_rate) def forward(self, x: Tensor) -> Tensor: if self.has_expansion: result = self.expand_conv(x) result = self.project_conv(result) else: result = self.project_conv(x) if self.has_shortcut: if self.drop_rate > 0: result = self.dropout(result) result += x return result def complexity(self, cx): if self.has_expansion: cx = self.expand_conv.complexity(cx) cx = self.project_conv.complexity(cx) else: cx = self.project_conv.complexity(cx) return cx class EfficientNetV2(nn.Module): def __init__(self, model_cnf: list, num_classes: int = 1000, num_features: int = 1280, dropout_rate: float = 0.2, drop_connect_rate: float = 0.2): super(EfficientNetV2, self).__init__() for cnf in model_cnf: assert len(cnf) == 8 self.model_cnf = model_cnf self.num_classes = num_classes self.num_features = num_features norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1) stem_filter_num = model_cnf[0][4] self.stem = ConvBNAct(3, stem_filter_num, kernel_size=3, stride=2, norm_layer=norm_layer) # 激活函数默认是SiLU total_blocks = sum([i[0] for i in model_cnf]) block_id = 0 blocks = [] for cnf in model_cnf: repeats = cnf[0] op = FusedMBConv if cnf[-2] == 0 else MBConv for i in range(repeats): blocks.append(op(kernel_size=cnf[1], input_c=cnf[4] if i == 0 else cnf[5], out_c=cnf[5], expand_ratio=cnf[3], stride=cnf[2] if i == 0 else 1, se_ratio=cnf[-1], drop_rate=drop_connect_rate * block_id / total_blocks, norm_layer=norm_layer)) block_id += 1 self.blocks = nn.Sequential(*blocks) head_input_c = model_cnf[-1][-3] head = OrderedDict() head.update({"project_conv": ConvBNAct(head_input_c, num_features, kernel_size=1, norm_layer=norm_layer)}) # 激活函数默认是SiLU head.update({"avgpool": nn.AdaptiveAvgPool2d(1)}) head.update({"flatten": nn.Flatten()}) if dropout_rate > 0: head.update({"dropout": nn.Dropout(p=dropout_rate, inplace=True)}) head.update({"classifier": nn.Linear(num_features, num_classes)}) self.head = nn.Sequential(head) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def forward(self, x: Tensor) -> Tensor: x = self.stem(x) x = self.blocks(x) x = self.head(x) return x def complexity(self, h, w, c): cx = {"h": h, "w": w, "c": c, "flops": 0, "params": 0, "acts": 0, "freeze": 0} cx = self.stem.complexity(cx) for module in self.blocks.children(): if hasattr(module, "complexity"): cx = module.complexity(cx) else: print(module) for module in self.head.children(): if hasattr(module, "complexity"): cx = module.complexity(cx) elif isinstance(module, nn.Linear): in_units = module.in_features out_units = module.out_features cx = gap2d_cx(cx) cx = linear_cx(cx, in_units, out_units, bias=True, trainable=module.weight.requires_grad) # print(cx) return cx def efficientnetv2_s(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 300, eval_size: 384 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[2, 3, 1, 1, 24, 24, 0, 0], [4, 3, 2, 4, 24, 48, 0, 0], [4, 3, 2, 4, 48, 64, 0, 0], [6, 3, 2, 4, 64, 128, 1, 0.25], [9, 3, 1, 6, 128, 160, 1, 0.25], [15, 3, 2, 6, 160, 256, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.2) return model def efficientnetv2_m(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 384, eval_size: 480 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[3, 3, 1, 1, 24, 24, 0, 0], [5, 3, 2, 4, 24, 48, 0, 0], [5, 3, 2, 4, 48, 80, 0, 0], [7, 3, 2, 4, 80, 160, 1, 0.25], [14, 3, 1, 6, 160, 176, 1, 0.25], [18, 3, 2, 6, 176, 304, 1, 0.25], [5, 3, 1, 6, 304, 512, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.3) return model def efficientnetv2_l(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 384, eval_size: 480 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[4, 3, 1, 1, 32, 32, 0, 0], [7, 3, 2, 4, 32, 64, 0, 0], [7, 3, 2, 4, 64, 96, 0, 0], [10, 3, 2, 4, 96, 192, 1, 0.25], [19, 3, 1, 6, 192, 224, 1, 0.25], [25, 3, 2, 6, 224, 384, 1, 0.25], [7, 3, 1, 6, 384, 640, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.4) return model ================================================ FILE: pytorch_classification/model_complexity/utils.py ================================================ """ these code refers to: https://github.com/facebookresearch/pycls/blob/master/pycls/models/blocks.py """ def conv2d_cx(cx, in_c, out_c, k, *, stride=1, groups=1, bias=False, trainable=True): """Accumulates complexity of conv2d into cx = (h, w, flops, params, acts).""" assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." h, w, c = cx["h"], cx["w"], cx["c"] assert c == in_c h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 cx["h"] = h cx["w"] = w cx["c"] = out_c cx["flops"] += k * k * in_c * out_c * h * w // groups + (out_c if bias else 0) cx["params"] += k * k * in_c * out_c // groups + (out_c if bias else 0) cx["acts"] += out_c * h * w if trainable is False: cx["freeze"] += k * k * in_c * out_c // groups + (out_c if bias else 0) return cx def pool2d_cx(cx, in_c, k, *, stride=1): """Accumulates complexity of pool2d into cx = (h, w, flops, params, acts).""" assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." h, w, c = cx["h"], cx["w"], cx["c"] assert c == in_c h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 cx["h"] = h cx["w"] = w cx["acts"] += in_c * h * w return cx def norm2d_cx(cx, in_c, trainable=True): """Accumulates complexity of norm2d into cx = (h, w, flops, params, acts).""" c, params = cx["c"], cx["params"] assert c == in_c cx["params"] += 4 * c cx["freeze"] += 2 * c # moving_mean, variance if trainable is False: cx["freeze"] += 2 * c # beta, gamma return cx def gap2d_cx(cx): """Accumulates complexity of gap2d into cx = (h, w, flops, params, acts).""" cx["h"] = 1 cx["w"] = 1 return cx def linear_cx(cx, in_units, out_units, *, bias=False, trainable=True): """Accumulates complexity of linear into cx = (h, w, flops, params, acts).""" c = cx["c"] assert c == in_units cx["c"] = out_units cx["flops"] += in_units * out_units + (out_units if bias else 0) cx["params"] += in_units * out_units + (out_units if bias else 0) cx["acts"] += out_units if trainable is False: cx["freeze"] += in_units * out_units + (out_units if bias else 0) return cx ================================================ FILE: pytorch_classification/swin_transformer/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,在`model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/swin_transformer/create_confusion_matrix.py ================================================ import os import json import argparse import sys import torch from torchvision import transforms import numpy as np from tqdm import tqdm import matplotlib.pyplot as plt from prettytable import PrettyTable from utils import read_split_data from my_dataset import MyDataSet from model import swin_base_patch4_window12_384_in22k as create_model class ConfusionMatrix(object): """ 注意,如果显示的图像不全,是matplotlib版本问题 本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常 需要额外安装prettytable库 """ def __init__(self, num_classes: int, labels: list): self.matrix = np.zeros((num_classes, num_classes)) self.num_classes = num_classes self.labels = labels def update(self, preds, labels): for p, t in zip(preds, labels): self.matrix[p, t] += 1 def summary(self): # calculate accuracy sum_TP = 0 for i in range(self.num_classes): sum_TP += self.matrix[i, i] acc = sum_TP / np.sum(self.matrix) print("the model accuracy is ", acc) # precision, recall, specificity table = PrettyTable() table.field_names = ["", "Precision", "Recall", "Specificity"] for i in range(self.num_classes): TP = self.matrix[i, i] FP = np.sum(self.matrix[i, :]) - TP FN = np.sum(self.matrix[:, i]) - TP TN = np.sum(self.matrix) - TP - FP - FN Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0. Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0. Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0. table.add_row([self.labels[i], Precision, Recall, Specificity]) print(table) def plot(self): matrix = self.matrix print(matrix) plt.imshow(matrix, cmap=plt.cm.Blues) # 设置x轴坐标label plt.xticks(range(self.num_classes), self.labels, rotation=45) # 设置y轴坐标label plt.yticks(range(self.num_classes), self.labels) # 显示colorbar plt.colorbar() plt.xlabel('True Labels') plt.ylabel('Predicted Labels') plt.title('Confusion matrix') # 在图中标注数量/概率信息 thresh = matrix.max() / 2 for x in range(self.num_classes): for y in range(self.num_classes): # 注意这里的matrix[y, x]不是matrix[x, y] info = int(matrix[y, x]) plt.text(x, y, info, verticalalignment='center', horizontalalignment='center', color="white" if info > thresh else "black") plt.tight_layout() plt.show() def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(f"using device: {device}") _, _, val_images_path, val_images_label = read_split_data(args.data_path) img_size = 384 data_transform = { "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) nw = min([os.cpu_count(), args.batch_size if args.batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=args.num_classes) # load pretrain weights assert os.path.exists(args.weights), "cannot find {} file".format(args.weights) model.load_state_dict(torch.load(args.weights, map_location=device)) model.to(device) # read class_indict json_label_path = './class_indices.json' assert os.path.exists(json_label_path), "cannot find {} file".format(json_label_path) json_file = open(json_label_path, 'r') class_indict = json.load(json_file) labels = [label for _, label in class_indict.items()] confusion = ConfusionMatrix(num_classes=args.num_classes, labels=labels) model.eval() with torch.no_grad(): for val_data in tqdm(val_loader, file=sys.stdout): val_images, val_labels = val_data outputs = model(val_images.to(device)) outputs = torch.softmax(outputs, dim=1) outputs = torch.argmax(outputs, dim=1) confusion.update(outputs.to("cpu").numpy(), val_labels.to("cpu").numpy()) confusion.plot() confusion.summary() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--batch-size', type=int, default=2) # 数据集所在根目录 # http://download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # 训练权重路径 parser.add_argument('--weights', type=str, default='./weights/model-19.pth', help='initial weights path') # 是否冻结权重 parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/swin_transformer/model.py ================================================ """ Swin Transformer A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - https://arxiv.org/pdf/2103.14030 Code/weights from https://github.com/microsoft/Swin-Transformer """ import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint as checkpoint import numpy as np from typing import Optional def drop_path_f(x, drop_prob: float = 0., training: bool = False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path_f(x, self.drop_prob, self.training) def window_partition(x, window_size: int): """ 将feature map按照window_size划分成一个个没有重叠的window Args: x: (B, H, W, C) window_size (int): window size(M) Returns: windows: (num_windows*B, window_size, window_size, C) """ B, H, W, C = x.shape x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) # permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C] # view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C] windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) return windows def window_reverse(windows, window_size: int, H: int, W: int): """ 将一个个window还原成一个feature map Args: windows: (num_windows*B, window_size, window_size, C) window_size (int): Window size(M) H (int): Height of image W (int): Width of image Returns: x: (B, H, W, C) """ B = int(windows.shape[0] / (H * W / window_size / window_size)) # view: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C] x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C] # view: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C] x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) return x class PatchEmbed(nn.Module): """ 2D Image to Patch Embedding """ def __init__(self, patch_size=4, in_c=3, embed_dim=96, norm_layer=None): super().__init__() patch_size = (patch_size, patch_size) self.patch_size = patch_size self.in_chans = in_c self.embed_dim = embed_dim self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() def forward(self, x): _, _, H, W = x.shape # padding # 如果输入图片的H,W不是patch_size的整数倍,需要进行padding pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0) if pad_input: # to pad the last 3 dimensions, # (W_left, W_right, H_top,H_bottom, C_front, C_back) x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1], 0, self.patch_size[0] - H % self.patch_size[0], 0, 0)) # 下采样patch_size倍 x = self.proj(x) _, _, H, W = x.shape # flatten: [B, C, H, W] -> [B, C, HW] # transpose: [B, C, HW] -> [B, HW, C] x = x.flatten(2).transpose(1, 2) x = self.norm(x) return x, H, W class PatchMerging(nn.Module): r""" Patch Merging Layer. Args: dim (int): Number of input channels. norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm """ def __init__(self, dim, norm_layer=nn.LayerNorm): super().__init__() self.dim = dim self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) self.norm = norm_layer(4 * dim) def forward(self, x, H, W): """ x: B, H*W, C """ B, L, C = x.shape assert L == H * W, "input feature has wrong size" x = x.view(B, H, W, C) # padding # 如果输入feature map的H,W不是2的整数倍,需要进行padding pad_input = (H % 2 == 1) or (W % 2 == 1) if pad_input: # to pad the last 3 dimensions, starting from the last dimension and moving forward. # (C_front, C_back, W_left, W_right, H_top, H_bottom) # 注意这里的Tensor通道是[B, H, W, C],所以会和官方文档有些不同 x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) x0 = x[:, 0::2, 0::2, :] # [B, H/2, W/2, C] x1 = x[:, 1::2, 0::2, :] # [B, H/2, W/2, C] x2 = x[:, 0::2, 1::2, :] # [B, H/2, W/2, C] x3 = x[:, 1::2, 1::2, :] # [B, H/2, W/2, C] x = torch.cat([x0, x1, x2, x3], -1) # [B, H/2, W/2, 4*C] x = x.view(B, -1, 4 * C) # [B, H/2*W/2, 4*C] x = self.norm(x) x = self.reduction(x) # [B, H/2*W/2, 2*C] return x class Mlp(nn.Module): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Linear(in_features, hidden_features) self.act = act_layer() self.drop1 = nn.Dropout(drop) self.fc2 = nn.Linear(hidden_features, out_features) self.drop2 = nn.Dropout(drop) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.drop1(x) x = self.fc2(x) x = self.drop2(x) return x class WindowAttention(nn.Module): r""" Window based multi-head self attention (W-MSA) module with relative position bias. It supports both of shifted and non-shifted window. Args: dim (int): Number of input channels. window_size (tuple[int]): The height and width of the window. num_heads (int): Number of attention heads. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 proj_drop (float, optional): Dropout ratio of output. Default: 0.0 """ def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.): super().__init__() self.dim = dim self.window_size = window_size # [Mh, Mw] self.num_heads = num_heads head_dim = dim // num_heads self.scale = head_dim ** -0.5 # define a parameter table of relative position bias self.relative_position_bias_table = nn.Parameter( torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # [2*Mh-1 * 2*Mw-1, nH] # get pair-wise relative position index for each token inside the window coords_h = torch.arange(self.window_size[0]) coords_w = torch.arange(self.window_size[1]) coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing="ij")) # [2, Mh, Mw] coords_flatten = torch.flatten(coords, 1) # [2, Mh*Mw] # [2, Mh*Mw, 1] - [2, 1, Mh*Mw] relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # [2, Mh*Mw, Mh*Mw] relative_coords = relative_coords.permute(1, 2, 0).contiguous() # [Mh*Mw, Mh*Mw, 2] relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 relative_coords[:, :, 1] += self.window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 relative_position_index = relative_coords.sum(-1) # [Mh*Mw, Mh*Mw] self.register_buffer("relative_position_index", relative_position_index) self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) nn.init.trunc_normal_(self.relative_position_bias_table, std=.02) self.softmax = nn.Softmax(dim=-1) def forward(self, x, mask: Optional[torch.Tensor] = None): """ Args: x: input features with shape of (num_windows*B, Mh*Mw, C) mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None """ # [batch_size*num_windows, Mh*Mw, total_embed_dim] B_, N, C = x.shape # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim] # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head] # permute: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple) # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw] # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw] q = q * self.scale attn = (q @ k.transpose(-2, -1)) # relative_position_bias_table.view: [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH] relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # [nH, Mh*Mw, Mh*Mw] attn = attn + relative_position_bias.unsqueeze(0) if mask is not None: # mask: [nW, Mh*Mw, Mh*Mw] nW = mask.shape[0] # num_windows # attn.view: [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw] # mask.unsqueeze: [1, nW, 1, Mh*Mw, Mh*Mw] attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) attn = attn.view(-1, self.num_heads, N, N) attn = self.softmax(attn) else: attn = self.softmax(attn) attn = self.attn_drop(attn) # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head] # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim] x = (attn @ v).transpose(1, 2).reshape(B_, N, C) x = self.proj(x) x = self.proj_drop(x) return x class SwinTransformerBlock(nn.Module): r""" Swin Transformer Block. Args: dim (int): Number of input channels. num_heads (int): Number of attention heads. window_size (int): Window size. shift_size (int): Shift size for SW-MSA. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float, optional): Stochastic depth rate. Default: 0.0 act_layer (nn.Module, optional): Activation layer. Default: nn.GELU norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm """ def __init__(self, dim, num_heads, window_size=7, shift_size=0, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super().__init__() self.dim = dim self.num_heads = num_heads self.window_size = window_size self.shift_size = shift_size self.mlp_ratio = mlp_ratio assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" self.norm1 = norm_layer(dim) self.attn = WindowAttention( dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) def forward(self, x, attn_mask): H, W = self.H, self.W B, L, C = x.shape assert L == H * W, "input feature has wrong size" shortcut = x x = self.norm1(x) x = x.view(B, H, W, C) # pad feature maps to multiples of window size # 把feature map给pad到window size的整数倍 pad_l = pad_t = 0 pad_r = (self.window_size - W % self.window_size) % self.window_size pad_b = (self.window_size - H % self.window_size) % self.window_size x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) _, Hp, Wp, _ = x.shape # cyclic shift if self.shift_size > 0: shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) else: shifted_x = x attn_mask = None # partition windows x_windows = window_partition(shifted_x, self.window_size) # [nW*B, Mh, Mw, C] x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # [nW*B, Mh*Mw, C] # W-MSA/SW-MSA attn_windows = self.attn(x_windows, mask=attn_mask) # [nW*B, Mh*Mw, C] # merge windows attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) # [nW*B, Mh, Mw, C] shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # [B, H', W', C] # reverse cyclic shift if self.shift_size > 0: x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) else: x = shifted_x if pad_r > 0 or pad_b > 0: # 把前面pad的数据移除掉 x = x[:, :H, :W, :].contiguous() x = x.view(B, H * W, C) # FFN x = shortcut + self.drop_path(x) x = x + self.drop_path(self.mlp(self.norm2(x))) return x class BasicLayer(nn.Module): """ A basic Swin Transformer layer for one stage. Args: dim (int): Number of input channels. depth (int): Number of blocks. num_heads (int): Number of attention heads. window_size (int): Local window size. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. """ def __init__(self, dim, depth, num_heads, window_size, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False): super().__init__() self.dim = dim self.depth = depth self.window_size = window_size self.use_checkpoint = use_checkpoint self.shift_size = window_size // 2 # build blocks self.blocks = nn.ModuleList([ SwinTransformerBlock( dim=dim, num_heads=num_heads, window_size=window_size, shift_size=0 if (i % 2 == 0) else self.shift_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop, attn_drop=attn_drop, drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, norm_layer=norm_layer) for i in range(depth)]) # patch merging layer if downsample is not None: self.downsample = downsample(dim=dim, norm_layer=norm_layer) else: self.downsample = None def create_mask(self, x, H, W): # calculate attention mask for SW-MSA # 保证Hp和Wp是window_size的整数倍 Hp = int(np.ceil(H / self.window_size)) * self.window_size Wp = int(np.ceil(W / self.window_size)) * self.window_size # 拥有和feature map一样的通道排列顺序,方便后续window_partition img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # [1, Hp, Wp, 1] h_slices = (slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None)) w_slices = (slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None)) cnt = 0 for h in h_slices: for w in w_slices: img_mask[:, h, w, :] = cnt cnt += 1 mask_windows = window_partition(img_mask, self.window_size) # [nW, Mh, Mw, 1] mask_windows = mask_windows.view(-1, self.window_size * self.window_size) # [nW, Mh*Mw] attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1] # [nW, Mh*Mw, Mh*Mw] attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) return attn_mask def forward(self, x, H, W): attn_mask = self.create_mask(x, H, W) # [nW, Mh*Mw, Mh*Mw] for blk in self.blocks: blk.H, blk.W = H, W if not torch.jit.is_scripting() and self.use_checkpoint: x = checkpoint.checkpoint(blk, x, attn_mask) else: x = blk(x, attn_mask) if self.downsample is not None: x = self.downsample(x, H, W) H, W = (H + 1) // 2, (W + 1) // 2 return x, H, W class SwinTransformer(nn.Module): r""" Swin Transformer A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - https://arxiv.org/pdf/2103.14030 Args: patch_size (int | tuple(int)): Patch size. Default: 4 in_chans (int): Number of input image channels. Default: 3 num_classes (int): Number of classes for classification head. Default: 1000 embed_dim (int): Patch embedding dimension. Default: 96 depths (tuple(int)): Depth of each Swin Transformer layer. num_heads (tuple(int)): Number of attention heads in different layers. window_size (int): Window size. Default: 7 mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True drop_rate (float): Dropout rate. Default: 0 attn_drop_rate (float): Attention dropout rate. Default: 0 drop_path_rate (float): Stochastic depth rate. Default: 0.1 norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. patch_norm (bool): If True, add normalization after patch embedding. Default: True use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False """ def __init__(self, patch_size=4, in_chans=3, num_classes=1000, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), window_size=7, mlp_ratio=4., qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=nn.LayerNorm, patch_norm=True, use_checkpoint=False, **kwargs): super().__init__() self.num_classes = num_classes self.num_layers = len(depths) self.embed_dim = embed_dim self.patch_norm = patch_norm # stage4输出特征矩阵的channels self.num_features = int(embed_dim * 2 ** (self.num_layers - 1)) self.mlp_ratio = mlp_ratio # split image into non-overlapping patches self.patch_embed = PatchEmbed( patch_size=patch_size, in_c=in_chans, embed_dim=embed_dim, norm_layer=norm_layer if self.patch_norm else None) self.pos_drop = nn.Dropout(p=drop_rate) # stochastic depth dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule # build layers self.layers = nn.ModuleList() for i_layer in range(self.num_layers): # 注意这里构建的stage和论文图中有些差异 # 这里的stage不包含该stage的patch_merging层,包含的是下个stage的 layers = BasicLayer(dim=int(embed_dim * 2 ** i_layer), depth=depths[i_layer], num_heads=num_heads[i_layer], window_size=window_size, mlp_ratio=self.mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], norm_layer=norm_layer, downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, use_checkpoint=use_checkpoint) self.layers.append(layers) self.norm = norm_layer(self.num_features) self.avgpool = nn.AdaptiveAvgPool1d(1) self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() self.apply(self._init_weights) def _init_weights(self, m): if isinstance(m, nn.Linear): nn.init.trunc_normal_(m.weight, std=.02) if isinstance(m, nn.Linear) and m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.LayerNorm): nn.init.constant_(m.bias, 0) nn.init.constant_(m.weight, 1.0) def forward(self, x): # x: [B, L, C] x, H, W = self.patch_embed(x) x = self.pos_drop(x) for layer in self.layers: x, H, W = layer(x, H, W) x = self.norm(x) # [B, L, C] x = self.avgpool(x.transpose(1, 2)) # [B, C, 1] x = torch.flatten(x, 1) x = self.head(x) return x def swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), num_classes=num_classes, **kwargs) return model def swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=96, depths=(2, 2, 18, 2), num_heads=(3, 6, 12, 24), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs): # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, **kwargs) return model def swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), num_classes=num_classes, **kwargs) return model def swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs): # trained ImageNet-22K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), num_classes=num_classes, **kwargs) return model ================================================ FILE: pytorch_classification/swin_transformer/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/swin_transformer/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from model import swin_tiny_patch4_window7_224 as create_model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") img_size = 224 data_transform = transforms.Compose( [transforms.Resize(int(img_size * 1.14)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=5).to(device) # load model weights model_weight_path = "./weights/model-9.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/swin_transformer/select_incorrect_samples.py ================================================ """ 该脚本能够把验证集中预测错误的图片挑选出来,并记录在record.txt中 """ import os import json import argparse import sys import torch from torchvision import transforms from tqdm import tqdm from my_dataset import MyDataSet from model import swin_base_patch4_window12_384_in22k as create_model from utils import read_split_data def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") _, _, val_images_path, val_images_label = read_split_data(args.data_path) img_size = 384 data_transform = { "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=args.num_classes).to(device) assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights) model.load_state_dict(torch.load(args.weights, map_location=device)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) json_file = open(json_path, "r") class_indict = json.load(json_file) model.eval() with torch.no_grad(): with open("record.txt", "w") as f: # validate data_loader = tqdm(val_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] contrast = torch.eq(pred_classes, labels.to(device)).tolist() labels = labels.tolist() pred_classes = pred_classes.tolist() for i, flag in enumerate(contrast): if flag is False: file_name = val_images_path[batch_size * step + i] true_label = class_indict[str(labels[i])] false_label = class_indict[str(pred_classes[i])] f.write(f"{file_name} TrueLabel:{true_label} PredictLabel:{false_label}\n") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--batch-size', type=int, default=2) # 数据集所在根目录 # http://download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # 训练权重路径 parser.add_argument('--weights', type=str, default='./weights/model-19.pth', help='initial weights path') # 是否冻结权重 parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/swin_transformer/train.py ================================================ import os import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from my_dataset import MyDataSet from model import swin_tiny_patch4_window7_224 as create_model from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") if os.path.exists("./weights") is False: os.makedirs("./weights") tb_writer = SummaryWriter() train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) img_size = 224 data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(img_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(int(img_size * 1.143)), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=args.num_classes).to(device) if args.weights != "": assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights) weights_dict = torch.load(args.weights, map_location=device)["model"] # 删除有关分类类别的权重 for k in list(weights_dict.keys()): if "head" in k: del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) if args.freeze_layers: for name, para in model.named_parameters(): # 除head外,其他权重全部冻结 if "head" not in name: para.requires_grad_(False) else: print("training {}".format(name)) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=5E-2) for epoch in range(args.epochs): # train train_loss, train_acc = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) # validate val_loss, val_acc = evaluate(model=model, data_loader=val_loader, device=device, epoch=epoch) tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"] tb_writer.add_scalar(tags[0], train_loss, epoch) tb_writer.add_scalar(tags[1], train_acc, epoch) tb_writer.add_scalar(tags[2], val_loss, epoch) tb_writer.add_scalar(tags[3], val_acc, epoch) tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--lr', type=float, default=0.0001) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") # 预训练权重路径,如果不想载入就设置为空字符 parser.add_argument('--weights', type=str, default='./swin_tiny_patch4_window7_224.pth', help='initial weights path') # 是否冻结权重 parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/swin_transformer/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() accu_loss = torch.zeros(1).to(device) # 累计损失 accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 optimizer.zero_grad() sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) loss.backward() accu_loss += loss.detach() data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return accu_loss.item() / (step + 1), accu_num.item() / sample_num @torch.no_grad() def evaluate(model, data_loader, device, epoch): loss_function = torch.nn.CrossEntropyLoss() model.eval() accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 accu_loss = torch.zeros(1).to(device) # 累计损失 sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) accu_loss += loss data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) return accu_loss.item() / (step + 1), accu_num.item() / sample_num ================================================ FILE: pytorch_classification/tensorboard_test/data_utils.py ================================================ import os import json import pickle import random from PIL import Image import torch import numpy as np import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def plot_class_preds(net, images_dir: str, transform, num_plot: int = 5, device="cpu"): if not os.path.exists(images_dir): print("not found {} path, ignore add figure.".format(images_dir)) return None label_path = os.path.join(images_dir, "label.txt") if not os.path.exists(label_path): print("not found {} file, ignore add figure".format(label_path)) return None # read class_indict json_label_path = './class_indices.json' assert os.path.exists(json_label_path), "not found {}".format(json_label_path) json_file = open(json_label_path, 'r') # {"0": "daisy"} flower_class = json.load(json_file) # {"daisy": "0"} class_indices = dict((v, k) for k, v in flower_class.items()) # reading label.txt file label_info = [] with open(label_path, "r") as rd: for line in rd.readlines(): line = line.strip() if len(line) > 0: split_info = [i for i in line.split(" ") if len(i) > 0] assert len(split_info) == 2, "label format error, expect file_name and class_name" image_name, class_name = split_info image_path = os.path.join(images_dir, image_name) # 如果文件不存在,则跳过 if not os.path.exists(image_path): print("not found {}, skip.".format(image_path)) continue # 如果读取的类别不在给定的类别内,则跳过 if class_name not in class_indices.keys(): print("unrecognized category {}, skip".format(class_name)) continue label_info.append([image_path, class_name]) if len(label_info) == 0: return None # get first num_plot info if len(label_info) > num_plot: label_info = label_info[:num_plot] num_imgs = len(label_info) images = [] labels = [] for img_path, class_name in label_info: # read img img = Image.open(img_path).convert("RGB") label_index = int(class_indices[class_name]) # preprocessing img = transform(img) images.append(img) labels.append(label_index) # batching images images = torch.stack(images, dim=0).to(device) # inference with torch.no_grad(): output = net(images) probs, preds = torch.max(torch.softmax(output, dim=1), dim=1) probs = probs.cpu().numpy() preds = preds.cpu().numpy() # width, height fig = plt.figure(figsize=(num_imgs * 2.5, 3), dpi=100) for i in range(num_imgs): # 1:子图共1行,num_imgs:子图共num_imgs列,当前绘制第i+1个子图 ax = fig.add_subplot(1, num_imgs, i+1, xticks=[], yticks=[]) # CHW -> HWC npimg = images[i].cpu().numpy().transpose(1, 2, 0) # 将图像还原至标准化之前 # mean:[0.485, 0.456, 0.406], std:[0.229, 0.224, 0.225] npimg = (npimg * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 plt.imshow(npimg.astype('uint8')) title = "{}, {:.2f}%\n(label: {})".format( flower_class[str(preds[i])], # predict class probs[i] * 100, # predict probability flower_class[str(labels[i])] # true class ) ax.set_title(title, color=("green" if preds[i] == labels[i] else "red")) return fig ================================================ FILE: pytorch_classification/tensorboard_test/model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: pytorch_classification/tensorboard_test/my_dataset.py ================================================ from tqdm import tqdm from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform delete_img = [] for index, img_path in tqdm(enumerate(images_path)): img = Image.open(img_path) w, h = img.size ratio = w / h if ratio > 10 or ratio < 0.1: delete_img.append(index) # print(img_path, ratio) for index in delete_img[::-1]: self.images_path.pop(index) self.images_class.pop(index) def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/tensorboard_test/requirements.txt ================================================ torchvision==0.7.0 tqdm==4.42.1 matplotlib==3.2.1 torch==1.13.1 Pillow tensorboard ================================================ FILE: pytorch_classification/tensorboard_test/train.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import resnet34 from my_dataset import MyDataSet from data_utils import read_split_data, plot_class_preds from train_eval_utils import train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') # 实例化SummaryWriter对象 tb_writer = SummaryWriter(log_dir="runs/flower_experiment") if os.path.exists("./weights") is False: os.makedirs("./weights") # 划分数据为训练集和验证集 train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) # 定义训练以及预测时的预处理方法 data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size # 计算使用num_workers的数量 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader(val_data_set, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 实例化模型 model = resnet34(num_classes=args.num_classes).to(device) # 将模型写入tensorboard init_img = torch.zeros((1, 3, 224, 224), device=device) tb_writer.add_graph(model, init_img) # 如果存在预训练权重则载入 if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(load_weights_dict, strict=False) else: print("not using pretrain-weights.") # 是否冻结权重 if args.freeze_layers: print("freeze layers except fc layer.") for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) # update learning rate scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) # add loss, acc and lr into tensorboard print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["train_loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) # add figure into tensorboard fig = plot_class_preds(net=model, images_dir="./plot_img", transform=data_transform["val"], num_plot=5, device=device) if fig is not None: tb_writer.add_figure("predictions vs. actuals", figure=fig, global_step=epoch) # add conv1 weights into tensorboard tb_writer.add_histogram(tag="conv1", values=model.conv1.weight, global_step=epoch) tb_writer.add_histogram(tag="layer1/block0/conv1", values=model.layer1[0].conv1.weight, global_step=epoch) # save weights torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.1) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz img_root = "/home/wz/my_project/my_github/data_set/flower_data/flower_photos" parser.add_argument('--data-path', type=str, default=img_root) # resnet34 官方权重下载地址 # https://download.pytorch.org/models/resnet34-333f7ec4.pth parser.add_argument('--weights', type=str, default='resNet34.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/tensorboard_test/train_eval_utils.py ================================================ import sys from tqdm import tqdm import torch def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() mean_loss = torch.zeros(1).to(device) optimizer.zero_grad() data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) loss.backward() mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses # 打印平均loss data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return mean_loss.item() @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) # 统计验证集样本总数目 num_samples = len(data_loader.dataset) # 打印验证进度 data_loader = tqdm(data_loader, desc="validation...", file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() # 计算预测正确的比例 acc = sum_num.item() / num_samples return acc ================================================ FILE: pytorch_classification/train_multi_GPU/README.md ================================================ ## 多GPU启动指令 - 如果要使用```train_multi_gpu_using_launch.py```脚本,使用以下指令启动 - ```python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_gpu_using_launch.py``` - 其中```nproc_per_node```为并行GPU的数量 - 如果要指定使用某几块GPU可使用如下指令,例如使用第1块和第4块GPU进行训练: - ```CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_gpu_using_launch.py``` ----- - 如果要使用```train_multi_gpu_using_spawn.py```脚本,使用以下指令启动 - ```python train_multi_gpu_using_spawn.py``` ## 训练时间对比 ![training time](training_time.png) ## 是否使用SyncBatchNorm ![syncbn](syncbn.png) ## 单GPU与多GPU训练曲线 ![accuracy](accuracy.png) ================================================ FILE: pytorch_classification/train_multi_GPU/model.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: pytorch_classification/train_multi_GPU/multi_train_utils/distributed_utils.py ================================================ import os import torch import torch.distributed as dist def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' # 通信后端,nvidia GPU推荐使用NCCL print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) dist.barrier() def cleanup(): dist.destroy_process_group() def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def reduce_value(value, average=True): world_size = get_world_size() if world_size < 2: # 单GPU的情况 return value with torch.no_grad(): dist.all_reduce(value) if average: value /= world_size return value ================================================ FILE: pytorch_classification/train_multi_GPU/multi_train_utils/train_eval_utils.py ================================================ import sys from tqdm import tqdm import torch from multi_train_utils.distributed_utils import reduce_value, is_main_process def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() mean_loss = torch.zeros(1).to(device) optimizer.zero_grad() # 在进程0中打印训练进度 if is_main_process(): data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) loss = loss_function(pred, labels.to(device)) loss.backward() loss = reduce_value(loss, average=True) mean_loss = (mean_loss * step + loss.detach()) / (step + 1) # update mean losses # 在进程0中打印平均loss if is_main_process(): data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3)) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() # 等待所有进程计算完毕 if device != torch.device("cpu"): torch.cuda.synchronize(device) return mean_loss.item() @torch.no_grad() def evaluate(model, data_loader, device): model.eval() # 用于存储预测正确的样本个数 sum_num = torch.zeros(1).to(device) # 在进程0中打印验证进度 if is_main_process(): data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data pred = model(images.to(device)) pred = torch.max(pred, dim=1)[1] sum_num += torch.eq(pred, labels.to(device)).sum() # 等待所有进程计算完毕 if device != torch.device("cpu"): torch.cuda.synchronize(device) sum_num = reduce_value(sum_num, average=False) return sum_num.item() ================================================ FILE: pytorch_classification/train_multi_GPU/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/train_multi_GPU/plot_results.py ================================================ import math import matplotlib.pyplot as plt x = [0, 1, 2, 3] y = [9, 5.5, 3, 2] plt.bar(x, y, align='center') plt.xticks(range(len(x)), ['One-GPU', '2 GPUs', '4 GPUs', '8 GPUs']) plt.ylim((0, 10)) for i, v in enumerate(y): plt.text(x=i, y=v + 0.1, s=str(v) + ' s', ha='center') plt.xlabel('Using number of GPU device') plt.ylabel('Training time per epoch (second)') plt.show() plt.close() x = list(range(30)) no_SyncBatchNorm = [0.348, 0.495, 0.587, 0.554, 0.637, 0.622, 0.689, 0.673, 0.702, 0.717, 0.717, 0.69, 0.716, 0.696, 0.738, 0.75, 0.75, 0.66, 0.713, 0.758, 0.777, 0.777, 0.769, 0.792, 0.802, 0.807, 0.807, 0.804, 0.812, 0.811] SyncBatchNorm = [0.283, 0.514, 0.531, 0.654, 0.671, 0.591, 0.621, 0.685, 0.701, 0.732, 0.701, 0.74, 0.667, 0.723, 0.745, 0.679, 0.738, 0.772, 0.764, 0.765, 0.764, 0.791, 0.818, 0.791, 0.807, 0.806, 0.811, 0.821, 0.833, 0.81] plt.plot(x, no_SyncBatchNorm, label="No SyncBatchNorm") plt.plot(x, SyncBatchNorm, label="SyncBatchNorm") plt.xlabel('Training epochs') plt.ylabel('Accuracy') plt.legend() plt.show() plt.close() x = list(range(30)) single_gpu = [0.569, 0.576, 0.654, 0.648, 0.609, 0.637, 0.699, 0.709, 0.715, 0.715, 0.717, 0.724, 0.722, 0.731, 0.721, 0.774, 0.751, 0.787, 0.78, 0.77, 0.763, 0.803, 0.754, 0.796, 0.799, 0.815, 0.793, 0.808, 0.811, 0.806] plt.plot(x, single_gpu, color="black", label="Single GPU") plt.plot(x, no_SyncBatchNorm, label="No SyncBatchNorm") plt.plot(x, SyncBatchNorm, label="SyncBatchNorm") plt.xlabel('Training epochs') plt.ylabel('Accuracy') plt.legend() plt.show() plt.close() # epochs = 30 # lrf = 0.1 # lf0 = lambda x: math.cos(x * math.pi / epochs) # lf1 = lambda x: 1 + math.cos(x * math.pi / epochs) # lf2 = lambda x: (1 + math.cos(x * math.pi / epochs)) / 2 # lf3 = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf # x = range(epochs) # y0 = [lf0(epoch) for epoch in x] # y1 = [lf1(epoch) for epoch in x] # y2 = [lf2(epoch) for epoch in x] # y3 = [lf3(epoch) for epoch in x] # plt.subplot(2, 2, 1) # plt.plot(x, y0) # plt.hlines(1, 0, epochs-1, colors="r", linestyles="dashed") # plt.hlines(-1, 0, epochs-1, colors="r", linestyles="dashed") # plt.xlim((0, epochs-1)) # # plt.subplot(2, 2, 2) # plt.plot(x, y1) # plt.hlines(2, 0, epochs-1, colors="r", linestyles="dashed") # plt.hlines(0, 0, epochs-1, colors="r", linestyles="dashed") # plt.xlim((0, epochs-1)) # # plt.subplot(2, 2, 3) # plt.plot(x, y2) # plt.hlines(1, 0, epochs-1, colors="r", linestyles="dashed") # plt.hlines(0, 0, epochs-1, colors="r", linestyles="dashed") # plt.xlim((0, epochs-1)) # # plt.subplot(2, 2, 4) # plt.plot(x, y3) # plt.hlines(1, 0, epochs-1, colors="r", linestyles="dashed") # plt.hlines(lrf, 0, epochs-1, colors="r", linestyles="dashed") # plt.text(epochs-1, y3[-1], "{}".format(round(y3[-1], 1))) # plt.xlim((0, epochs-1)) # # plt.show() # plt.close() ================================================ FILE: pytorch_classification/train_multi_GPU/requirements.txt ================================================ matplotlib==3.2.1 tqdm==4.42.1 torchvision==0.7.0 torch==1.13.1 ================================================ FILE: pytorch_classification/train_multi_GPU/train_multi_gpu_using_launch.py ================================================ import os import math import tempfile import argparse import torch import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from model import resnet34 from my_dataset import MyDataSet from utils import read_split_data, plot_data_loader_image from multi_train_utils.distributed_utils import init_distributed_mode, dist, cleanup from multi_train_utils.train_eval_utils import train_one_epoch, evaluate def main(args): if torch.cuda.is_available() is False: raise EnvironmentError("not find GPU device for training.") # 初始化各进程环境 init_distributed_mode(args=args) rank = args.rank device = torch.device(args.device) batch_size = args.batch_size weights_path = args.weights args.lr *= args.world_size # 学习率要根据并行GPU的数量进行倍增 checkpoint_path = "" if rank == 0: # 在第一个进程中打印信息,并实例化tensorboard print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_info, val_info, num_classes = read_split_data(args.data_path) train_images_path, train_images_label = train_info val_images_path, val_images_label = val_info # check num_classes assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(args.num_classes, num_classes) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) # 给每个rank对应的进程分配训练的样本索引 train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set) val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set) # 将样本索引每batch_size个元素组成一个list train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, batch_size, drop_last=True) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers if rank == 0: print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_data_set, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader(val_data_set, batch_size=batch_size, sampler=val_sampler, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 实例化模型 model = resnet34(num_classes=num_classes).to(device) # 如果存在预训练权重则载入 if os.path.exists(weights_path): weights_dict = torch.load(weights_path, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(load_weights_dict, strict=False) else: checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt") # 如果不存在预训练权重,需要将第一个进程中的权重保存,然后其他进程载入,保持初始化权重一致 if rank == 0: torch.save(model.state_dict(), checkpoint_path) dist.barrier() # 这里注意,一定要指定map_location参数,否则会导致第一块GPU占用更多资源 model.load_state_dict(torch.load(checkpoint_path, map_location=device)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) else: # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义 if args.syncBN: # 使用SyncBatchNorm后训练会更耗时 model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) # 转为DDP模型 model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): train_sampler.set_epoch(epoch) mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() sum_num = evaluate(model=model, data_loader=val_loader, device=device) acc = sum_num / val_sampler.total_size if rank == 0: print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch)) # 删除临时缓存文件 if rank == 0: if os.path.exists(checkpoint_path) is True: os.remove(checkpoint_path) cleanup() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.1) # 是否启用SyncBatchNorm parser.add_argument('--syncBN', type=bool, default=True) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/home/wz/data_set/flower_data/flower_photos") # resnet34 官方权重下载地址 # https://download.pytorch.org/models/resnet34-333f7ec4.pth parser.add_argument('--weights', type=str, default='resNet34.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) # 不要改该参数,系统会自动分配 parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') # 开启的进程数(注意不是线程),不用设置该参数,会根据nproc_per_node自动设置 parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/train_multi_GPU/train_multi_gpu_using_spawn.py ================================================ import os import math import tempfile import argparse import torch import torch.multiprocessing as mp from torch.multiprocessing import Process import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from model import resnet34 from my_dataset import MyDataSet from utils import read_split_data, plot_data_loader_image from multi_train_utils.distributed_utils import dist, cleanup from multi_train_utils.train_eval_utils import train_one_epoch, evaluate def main_fun(rank, world_size, args): if torch.cuda.is_available() is False: raise EnvironmentError("not find GPU device for training.") # 初始化各进程环境 start os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "12355" args.rank = rank args.world_size = world_size args.gpu = rank args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) dist.barrier() # 初始化各进程环境 end rank = args.rank device = torch.device(args.device) batch_size = args.batch_size weights_path = args.weights args.lr *= args.world_size # 学习率要根据并行GPU的数量进行倍增 checkpoint_path = "" if rank == 0: # 在第一个进程中打印信息,并实例化tensorboard print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_info, val_info, num_classes = read_split_data(args.data_path) train_images_path, train_images_label = train_info val_images_path, val_images_label = val_info # check num_classes assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(args.num_classes, num_classes) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) # 给每个rank对应的进程分配训练的样本索引 train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set) val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set) # 将样本索引每batch_size个元素组成一个list train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, batch_size, drop_last=True) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers if rank == 0: print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_data_set, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader(val_data_set, batch_size=batch_size, sampler=val_sampler, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 实例化模型 model = resnet34(num_classes=num_classes).to(device) # 如果存在预训练权重则载入 if os.path.exists(weights_path): weights_dict = torch.load(weights_path, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(load_weights_dict, strict=False) else: checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt") # 如果不存在预训练权重,需要将第一个进程中的权重保存,然后其他进程载入,保持初始化权重一致 if rank == 0: torch.save(model.state_dict(), checkpoint_path) dist.barrier() # 这里注意,一定要指定map_location参数,否则会导致第一块GPU占用更多资源 model.load_state_dict(torch.load(checkpoint_path, map_location=device)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) else: # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义 if args.syncBN: # 使用SyncBatchNorm后训练会更耗时 model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) # 转为DDP模型 model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): train_sampler.set_epoch(epoch) mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() sum_num = evaluate(model=model, data_loader=val_loader, device=device) acc = sum_num / val_sampler.total_size if rank == 0: print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch)) # 删除临时缓存文件 if rank == 0: if os.path.exists(checkpoint_path) is True: os.remove(checkpoint_path) cleanup() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.1) # 是否启用SyncBatchNorm parser.add_argument('--syncBN', type=bool, default=True) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/home/wz/data_set/flower_data/flower_photos") # resnet34 官方权重下载地址 # https://download.pytorch.org/models/resnet34-333f7ec4.pth parser.add_argument('--weights', type=str, default='resNet34.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) # 不要改该参数,系统会自动分配 parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') # 开启的进程数(注意不是线程),在单机中指使用GPU的数量 parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') opt = parser.parse_args() # when using mp.spawn, if I set number of works greater 1, # before each epoch training and validation will wait about 10 seconds # mp.spawn(main_fun, # args=(opt.world_size, opt), # nprocs=opt.world_size, # join=True) world_size = opt.world_size processes = [] for rank in range(world_size): p = Process(target=main_fun, args=(rank, world_size, opt)) p.start() processes.append(p) for p in processes: p.join() ================================================ FILE: pytorch_classification/train_multi_GPU/train_single_gpu.py ================================================ import os import math import argparse import torch import torch.optim as optim from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import torch.optim.lr_scheduler as lr_scheduler from model import resnet34, resnet101 from my_dataset import MyDataSet from utils import read_split_data from multi_train_utils.train_eval_utils import train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_info, val_info, num_classes = read_split_data(args.data_path) train_images_path, train_images_label = train_info val_images_path, val_images_label = val_info # check num_classes assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(args.num_classes, num_classes) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader(val_data_set, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 如果存在预训练权重则载入 model = resnet34(num_classes=args.num_classes).to(device) if args.weights != "": if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = {k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel()} print(model.load_state_dict(load_weights_dict, strict=False)) else: raise FileNotFoundError("not found weights file: {}".format(args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate sum_num = evaluate(model=model, data_loader=val_loader, device=device) acc = sum_num / len(val_data_set) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.1) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/home/w180662/my_project/my_github/data_set/flower_data/flower_photos") # resnet34 官方权重下载地址 # https://download.pytorch.org/models/resnet34-333f7ec4.pth parser.add_argument('--weights', type=str, default='resNet34.pth', help='initial weights path') parser.add_argument('--freeze-layers', type=bool, default=False) parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/train_multi_GPU/utils.py ================================================ import os import json import pickle import random import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 class_names = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 class_names.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(class_names)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in class_names: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(class_names)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(class_names)), class_names) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return [train_images_path, train_images_label], [val_images_path, val_images_label], len(class_names) def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list ================================================ FILE: pytorch_classification/vision_transformer/README.md ================================================ ## 代码使用简介 1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz), 如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0 2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径 3. 下载预训练权重,在`vit_model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重 4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径 5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件) 6. 在`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下) 7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径 8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了 9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数 ================================================ FILE: pytorch_classification/vision_transformer/flops.py ================================================ import torch from fvcore.nn import FlopCountAnalysis from vit_model import Attention def main(): # Self-Attention a1 = Attention(dim=512, num_heads=1) a1.proj = torch.nn.Identity() # remove Wo # Multi-Head Attention a2 = Attention(dim=512, num_heads=8) # [batch_size, num_tokens, total_embed_dim] t = (torch.rand(32, 1024, 512),) flops1 = FlopCountAnalysis(a1, t) print("Self-Attention FLOPs:", flops1.total()) flops2 = FlopCountAnalysis(a2, t) print("Multi-Head Attention FLOPs:", flops2.total()) if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/vision_transformer/my_dataset.py ================================================ from PIL import Image import torch from torch.utils.data import Dataset class MyDataSet(Dataset): """自定义数据集""" def __init__(self, images_path: list, images_class: list, transform=None): self.images_path = images_path self.images_class = images_class self.transform = transform def __len__(self): return len(self.images_path) def __getitem__(self, item): img = Image.open(self.images_path[item]) # RGB为彩色图片,L为灰度图片 if img.mode != 'RGB': raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item])) label = self.images_class[item] if self.transform is not None: img = self.transform(img) return img, label @staticmethod def collate_fn(batch): # 官方实现的default_collate可以参考 # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py images, labels = tuple(zip(*batch)) images = torch.stack(images, dim=0) labels = torch.as_tensor(labels) return images, labels ================================================ FILE: pytorch_classification/vision_transformer/predict.py ================================================ import os import json import torch from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt from vit_model import vit_base_patch16_224_in21k as create_model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=5, has_logits=False).to(device) # load model weights model_weight_path = "./weights/model-9.pth" model.load_state_dict(torch.load(model_weight_path, map_location=device)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_classification/vision_transformer/train.py ================================================ import os import math import argparse import torch import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from my_dataset import MyDataSet from vit_model import vit_base_patch16_224_in21k as create_model from utils import read_split_data, train_one_epoch, evaluate def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") if os.path.exists("./weights") is False: os.makedirs("./weights") tb_writer = SummaryWriter() train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]), "val": transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])} # 实例化训练数据集 train_dataset = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_dataset = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=args.num_classes, has_logits=False).to(device) if args.weights != "": assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights) weights_dict = torch.load(args.weights, map_location=device) # 删除不需要的权重 del_keys = ['head.weight', 'head.bias'] if model.has_logits \ else ['pre_logits.fc.weight', 'pre_logits.fc.bias', 'head.weight', 'head.bias'] for k in del_keys: del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) if args.freeze_layers: for name, para in model.named_parameters(): # 除head, pre_logits外,其他权重全部冻结 if "head" not in name and "pre_logits" not in name: para.requires_grad_(False) else: print("training {}".format(name)) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train train_loss, train_acc = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate val_loss, val_acc = evaluate(model=model, data_loader=val_loader, device=device, epoch=epoch) tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"] tb_writer.add_scalar(tags[0], train_loss, epoch) tb_writer.add_scalar(tags[1], train_acc, epoch) tb_writer.add_scalar(tags[2], val_loss, epoch) tb_writer.add_scalar(tags[3], val_acc, epoch) tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_classes', type=int, default=5) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrf', type=float, default=0.01) # 数据集所在根目录 # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz parser.add_argument('--data-path', type=str, default="/data/flower_photos") parser.add_argument('--model-name', default='', help='create model name') # 预训练权重路径,如果不想载入就设置为空字符 parser.add_argument('--weights', type=str, default='./vit_base_patch16_224_in21k.pth', help='initial weights path') # 是否冻结权重 parser.add_argument('--freeze-layers', type=bool, default=True) parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') opt = parser.parse_args() main(opt) ================================================ FILE: pytorch_classification/vision_transformer/utils.py ================================================ import os import sys import json import pickle import random import torch from tqdm import tqdm import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机结果可复现 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证各平台顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 排序,保证各平台顺序一致 images.sort() # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.".format(sum(every_class_num))) print("{} images for training.".format(len(train_images_path))) print("{} images for validation.".format(len(val_images_path))) assert len(train_images_path) > 0, "number of training images must greater than 0." assert len(val_images_path) > 0, "number of validation images must greater than 0." plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def plot_data_loader_image(data_loader): batch_size = data_loader.batch_size plot_num = min(batch_size, 4) json_path = './class_indices.json' assert os.path.exists(json_path), json_path + " does not exist." json_file = open(json_path, 'r') class_indices = json.load(json_file) for data in data_loader: images, labels = data for i in range(plot_num): # [C, H, W] -> [H, W, C] img = images[i].numpy().transpose(1, 2, 0) # 反Normalize操作 img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255 label = labels[i].item() plt.subplot(1, plot_num, i+1) plt.xlabel(class_indices[str(label)]) plt.xticks([]) # 去掉x轴的刻度 plt.yticks([]) # 去掉y轴的刻度 plt.imshow(img.astype('uint8')) plt.show() def write_pickle(list_info: list, file_name: str): with open(file_name, 'wb') as f: pickle.dump(list_info, f) def read_pickle(file_name: str) -> list: with open(file_name, 'rb') as f: info_list = pickle.load(f) return info_list def train_one_epoch(model, optimizer, data_loader, device, epoch): model.train() loss_function = torch.nn.CrossEntropyLoss() accu_loss = torch.zeros(1).to(device) # 累计损失 accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 optimizer.zero_grad() sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) loss.backward() accu_loss += loss.detach() data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss) sys.exit(1) optimizer.step() optimizer.zero_grad() return accu_loss.item() / (step + 1), accu_num.item() / sample_num @torch.no_grad() def evaluate(model, data_loader, device, epoch): loss_function = torch.nn.CrossEntropyLoss() model.eval() accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数 accu_loss = torch.zeros(1).to(device) # 累计损失 sample_num = 0 data_loader = tqdm(data_loader, file=sys.stdout) for step, data in enumerate(data_loader): images, labels = data sample_num += images.shape[0] pred = model(images.to(device)) pred_classes = torch.max(pred, dim=1)[1] accu_num += torch.eq(pred_classes, labels.to(device)).sum() loss = loss_function(pred, labels.to(device)) accu_loss += loss data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch, accu_loss.item() / (step + 1), accu_num.item() / sample_num) return accu_loss.item() / (step + 1), accu_num.item() / sample_num ================================================ FILE: pytorch_classification/vision_transformer/vit_model.py ================================================ """ original code from rwightman: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py """ from functools import partial from collections import OrderedDict import torch import torch.nn as nn def drop_path(x, drop_prob: float = 0., training: bool = False): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class PatchEmbed(nn.Module): """ 2D Image to Patch Embedding """ def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None): super().__init__() img_size = (img_size, img_size) patch_size = (patch_size, patch_size) self.img_size = img_size self.patch_size = patch_size self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) self.num_patches = self.grid_size[0] * self.grid_size[1] self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() def forward(self, x): B, C, H, W = x.shape assert H == self.img_size[0] and W == self.img_size[1], \ f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." # flatten: [B, C, H, W] -> [B, C, HW] # transpose: [B, C, HW] -> [B, HW, C] x = self.proj(x).flatten(2).transpose(1, 2) x = self.norm(x) return x class Attention(nn.Module): def __init__(self, dim, # 输入token的dim num_heads=8, qkv_bias=False, qk_scale=None, attn_drop_ratio=0., proj_drop_ratio=0.): super(Attention, self).__init__() self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop_ratio) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop_ratio) def forward(self, x): # [batch_size, num_patches + 1, total_embed_dim] B, N, C = x.shape # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim] # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head] # permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head] qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) # [batch_size, num_heads, num_patches + 1, embed_dim_per_head] q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1] # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1] attn = (q @ k.transpose(-2, -1)) * self.scale attn = attn.softmax(dim=-1) attn = self.attn_drop(attn) # @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head] # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head] # reshape: -> [batch_size, num_patches + 1, total_embed_dim] x = (attn @ v).transpose(1, 2).reshape(B, N, C) x = self.proj(x) x = self.proj_drop(x) return x class Mlp(nn.Module): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Linear(in_features, hidden_features) self.act = act_layer() self.fc2 = nn.Linear(hidden_features, out_features) self.drop = nn.Dropout(drop) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.drop(x) x = self.fc2(x) x = self.drop(x) return x class Block(nn.Module): def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super(Block, self).__init__() self.norm1 = norm_layer(dim) self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio) # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio) def forward(self, x): x = x + self.drop_path(self.attn(self.norm1(x))) x = x + self.drop_path(self.mlp(self.norm2(x))) return x class VisionTransformer(nn.Module): def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True, qk_scale=None, representation_size=None, distilled=False, drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None, act_layer=None): """ Args: img_size (int, tuple): input image size patch_size (int, tuple): patch size in_c (int): number of input channels num_classes (int): number of classes for classification head embed_dim (int): embedding dimension depth (int): depth of transformer num_heads (int): number of attention heads mlp_ratio (int): ratio of mlp hidden dim to embedding dim qkv_bias (bool): enable bias for qkv if True qk_scale (float): override default qk scale of head_dim ** -0.5 if set representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set distilled (bool): model includes a distillation token and head as in DeiT models drop_ratio (float): dropout rate attn_drop_ratio (float): attention dropout rate drop_path_ratio (float): stochastic depth rate embed_layer (nn.Module): patch embedding layer norm_layer: (nn.Module): normalization layer """ super(VisionTransformer, self).__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models self.num_tokens = 2 if distilled else 1 norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) act_layer = act_layer or nn.GELU self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim)) self.pos_drop = nn.Dropout(p=drop_ratio) dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)] # stochastic depth decay rule self.blocks = nn.Sequential(*[ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i], norm_layer=norm_layer, act_layer=act_layer) for i in range(depth) ]) self.norm = norm_layer(embed_dim) # Representation layer if representation_size and not distilled: self.has_logits = True self.num_features = representation_size self.pre_logits = nn.Sequential(OrderedDict([ ("fc", nn.Linear(embed_dim, representation_size)), ("act", nn.Tanh()) ])) else: self.has_logits = False self.pre_logits = nn.Identity() # Classifier head(s) self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() self.head_dist = None if distilled: self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() # Weight init nn.init.trunc_normal_(self.pos_embed, std=0.02) if self.dist_token is not None: nn.init.trunc_normal_(self.dist_token, std=0.02) nn.init.trunc_normal_(self.cls_token, std=0.02) self.apply(_init_vit_weights) def forward_features(self, x): # [B, C, H, W] -> [B, num_patches, embed_dim] x = self.patch_embed(x) # [B, 196, 768] # [1, 1, 768] -> [B, 1, 768] cls_token = self.cls_token.expand(x.shape[0], -1, -1) if self.dist_token is None: x = torch.cat((cls_token, x), dim=1) # [B, 197, 768] else: x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1) x = self.pos_drop(x + self.pos_embed) x = self.blocks(x) x = self.norm(x) if self.dist_token is None: return self.pre_logits(x[:, 0]) else: return x[:, 0], x[:, 1] def forward(self, x): x = self.forward_features(x) if self.head_dist is not None: x, x_dist = self.head(x[0]), self.head_dist(x[1]) if self.training and not torch.jit.is_scripting(): # during inference, return the average of both classifier predictions return x, x_dist else: return (x + x_dist) / 2 else: x = self.head(x) return x def _init_vit_weights(m): """ ViT weight initialization :param m: module """ if isinstance(m, nn.Linear): nn.init.trunc_normal_(m.weight, std=.01) if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.LayerNorm): nn.init.zeros_(m.bias) nn.init.ones_(m.weight) def vit_base_patch16_224(num_classes: int = 1000): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: 链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA 密码: eu9f """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=None, num_classes=num_classes) return model def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=768 if has_logits else None, num_classes=num_classes) return model def vit_base_patch32_224(num_classes: int = 1000): """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: 链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg 密码: s5hl """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=768, depth=12, num_heads=12, representation_size=None, num_classes=num_classes) return model def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=768, depth=12, num_heads=12, representation_size=768 if has_logits else None, num_classes=num_classes) return model def vit_large_patch16_224(num_classes: int = 1000): """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: 链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ 密码: qqt8 """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=None, num_classes=num_classes) return model def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=1024 if has_logits else None, num_classes=num_classes) return model def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. weights ported from official Google JAX impl: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=1024, depth=24, num_heads=16, representation_size=1024 if has_logits else None, num_classes=num_classes) return model def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. NOTE: converted weights not currently available, too large for github release hosting. """ model = VisionTransformer(img_size=224, patch_size=14, embed_dim=1280, depth=32, num_heads=16, representation_size=1280 if has_logits else None, num_classes=num_classes) return model ================================================ FILE: pytorch_keypoint/DeepPose/README.md ================================================ # DeepPose ## 对应论文 论文名称:`DeepPose: Human Pose Estimation via Deep Neural Networks` 论文arxiv链接:[https://arxiv.org/abs/1312.4659](https://arxiv.org/abs/1312.4659) ## 开发环境 开发环境主要信息如下,其他Python依赖详情可见`requirements.txt`文件 - Python3.10 - torch2.0.1+cu118(建议大于等于此版本) - torchvision0.15.2+cu118(建议大于等于此版本) ## 训练数据集准备 该项目采用的训练数据是WFLW数据集(人脸98点检测),官方链接:[https://wywu.github.io/projects/LAB/WFLW.html](https://wywu.github.io/projects/LAB/WFLW.html) 在官方网页下载数据集后解压并组织成如下目录形式: ``` WFLW ├── WFLW_annotations │ ├── list_98pt_rect_attr_train_test │ └── list_98pt_test └── WFLW_images ├── 0--Parade ├── 1--Handshaking ├── 10--People_Marching ├── 11--Meeting ├── 12--Group └── ...... ``` ## 预训练权重准备 由于该项目默认使用的backbone是torchvision中的resnet50,在实例化模型时会自动下载在imagenet上的预训练权重。 - 若训练环境可正常联网,则会自动下载预训练权重 - 若训练环境无法正常链接网络,可预先在联网的机器上手动下载,下载链接:[https://download.pytorch.org/models/resnet50-11ad3fa6.pth](https://download.pytorch.org/models/resnet50-11ad3fa6.pth) 下载完成后将权重拷贝至训练服务器的`~/.cache/torch/hub/checkpoints`目录下即可 ## 启动训练 将训练脚本中的`--dataset_dir`设置成自己构建的`WFLW`数据集绝对路径,例如`/home/wz/datasets/WFLW` ### 单卡训练 使用`train.py`脚本: ```bash python train.py ``` ### 多卡训练 使用`train_multi_GPU.py`脚本: ``` torchrun --nproc_per_node=8 train_multi_GPU.py ``` 若要单独指定使用某些卡可在启动指令前加入`CUDA_VISIBLE_DEVICES`参数,例如: ``` CUDA_VISIBLE_DEVICES=4,5,6,7 torchrun --nproc_per_node=4 train_multi_GPU.py ``` ## 训练好的权重下载地址 若没有训练条件或者只想简单体验下,可使用本人训练好的模型权重(包含optimizer等信息故文件会略大),该权重在WFLW验证集上的NME指标为`0.048`,百度网盘下载地址:[https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw](https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw) 提取码:8fux 下载完成后在当前项目下创建一个`weights`文件夹,并将权重放置该文件夹内。 ## 测试图片 可参考`predict.py`文件,将`img_path`设置成自己要预测的人脸图片(注意这里只支持单人脸的关键点检测,故需要提供单独的人脸图片,具体使用时可配合一个人脸检测器联合使用),例如输入图片: ![test.jpg](./test_img.jpg) 网络预测可视化结果为: ![predict.jpg](./predict.jpg) ## 导出ONNX模型(可选) 若需要导出ONNX模型可使用`export_onnx.py`脚本。 ================================================ FILE: pytorch_keypoint/DeepPose/datasets.py ================================================ import os from typing import List, Tuple import cv2 import torch import torch.utils.data as data import numpy as np class WFLWDataset(data.Dataset): """ https://wywu.github.io/projects/LAB/WFLW.html dataset structure: ├── WFLW_annotations │ ├── list_98pt_rect_attr_train_test │ └── list_98pt_test └── WFLW_images ├── 0--Parade ├── 1--Handshaking ├── 10--People_Marching ├── 11--Meeting ├── 12--Group └── ...... """ def __init__(self, root: str, train: bool = True, transforms=None): super().__init__() self.img_root = os.path.join(root, "WFLW_images") assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root) ana_txt_name = "list_98pt_rect_attr_train.txt" if train else "list_98pt_rect_attr_test.txt" self.anno_path = os.path.join(root, "WFLW_annotations", "list_98pt_rect_attr_train_test", ana_txt_name) assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path) self.transforms = transforms self.keypoints: List[np.ndarray] = [] self.face_rects: List[List[int]] = [] self.img_paths: List[str] = [] with open(self.anno_path, "rt") as f: for line in f.readlines(): if not line.strip(): continue split_list = line.strip().split(" ") keypoint_ = self.get_98_points(split_list) keypoint = np.array(keypoint_, dtype=np.float32).reshape((-1, 2)) face_rect = list(map(int, split_list[196: 196 + 4])) # xmin, ymin, xmax, ymax img_name = split_list[-1] self.keypoints.append(keypoint) self.face_rects.append(face_rect) self.img_paths.append(os.path.join(self.img_root, img_name)) @staticmethod def get_5_points(keypoints: List[str]) -> List[float]: five_num = [76, 82, 54, 96, 97] five_keypoint = [] for i in five_num: five_keypoint.append(keypoints[i * 2]) five_keypoint.append(keypoints[i * 2 + 1]) return list(map(float, five_keypoint)) @staticmethod def get_98_points(keypoints: List[str]) -> List[float]: return list(map(float, keypoints[:196])) @staticmethod def collate_fn(batch_infos: List[Tuple[torch.Tensor, dict]]): imgs, ori_keypoints, keypoints, m_invs = [], [], [], [] for info in batch_infos: imgs.append(info[0]) ori_keypoints.append(info[1]["ori_keypoint"]) keypoints.append(info[1]["keypoint"]) m_invs.append(info[1]["m_inv"]) imgs_tensor = torch.stack(imgs) keypoints_tensor = torch.stack(keypoints) ori_keypoints_tensor = torch.stack(ori_keypoints) m_invs_tensor = torch.stack(m_invs) targets = {"ori_keypoints": ori_keypoints_tensor, "keypoints": keypoints_tensor, "m_invs": m_invs_tensor} return imgs_tensor, targets def __getitem__(self, idx: int): img_bgr = cv2.imread(self.img_paths[idx], flags=cv2.IMREAD_COLOR) img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) target = { "box": self.face_rects[idx], "ori_keypoint": self.keypoints[idx], "keypoint": self.keypoints[idx] } if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.keypoints) if __name__ == '__main__': train_dataset = WFLWDataset("/home/wz/datasets/WFLW", train=True) print(len(train_dataset)) eval_dataset = WFLWDataset("/home/wz/datasets/WFLW", train=False) print(len(eval_dataset)) from utils import draw_keypoints img, target = train_dataset[0] keypoint = target["keypoint"] h, w, c = img.shape keypoint[:, 0] /= w keypoint[:, 1] /= h draw_keypoints(img, keypoint, "test_plot.jpg", is_rel=True) ================================================ FILE: pytorch_keypoint/DeepPose/export_onnx.py ================================================ import os import torch from model import create_deep_pose_model def main(): img_hw = [256, 256] num_keypoints = 98 weights_path = "./weights/model_weights_209.pth" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # create model model = create_deep_pose_model(num_keypoints=num_keypoints) # load model weights assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"]) model.to(device) model.eval() with torch.inference_mode(): x = torch.randn(size=(1, 3, img_hw[0], img_hw[1]), device=device) torch.onnx.export(model=model, args=(x,), f="deeppose.onnx") if __name__ == '__main__': main() ================================================ FILE: pytorch_keypoint/DeepPose/model.py ================================================ import torch import torch.nn as nn from torchvision.models import resnet50, ResNet50_Weights def create_deep_pose_model(num_keypoints: int) -> nn.Module: res50 = resnet50(ResNet50_Weights.IMAGENET1K_V2) in_features = res50.fc.in_features res50.fc = nn.Linear(in_features=in_features, out_features=num_keypoints * 2) return res50 if __name__ == '__main__': torch.manual_seed(1234) model = create_deep_pose_model(98) model.eval() with torch.inference_mode(): x = torch.randn(1, 3, 224, 224) res = model(x) print(res.shape) ================================================ FILE: pytorch_keypoint/DeepPose/predict.py ================================================ import os import torch import numpy as np from PIL import Image import transforms from model import create_deep_pose_model from utils import draw_keypoints def main(): img_hw = [256, 256] num_keypoints = 98 img_path = "./test_img.jpg" weights_path = "./weights/model_weights_209.pth" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") transform = transforms.Compose([ transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # load image assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = np.array(Image.open(img_path)) h, w, c = img.shape target = {"box": [0, 0, w, h]} img_tensor, target = transform(img, target=target) # expand batch dimension img_tensor = img_tensor.unsqueeze(0) # create model model = create_deep_pose_model(num_keypoints=num_keypoints) # load model weights assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) model.load_state_dict(torch.load(weights_path, map_location="cpu")["model"]) model.to(device) # prediction model.eval() with torch.inference_mode(): with torch.autocast(device_type=device.type): pred = torch.squeeze(model(img_tensor.to(device))).reshape([-1, 2]).cpu().numpy() wh_tensor = np.array(img_hw[::-1], dtype=np.float32).reshape([1, 2]) pred = pred * wh_tensor # rel coord to abs coord pred = transforms.affine_points_np(pred, target["m_inv"].numpy()) draw_keypoints(img, coordinate=pred, save_path="predict.jpg", radius=2) if __name__ == '__main__': main() ================================================ FILE: pytorch_keypoint/DeepPose/requirements.txt ================================================ torch>=2.0.1 torchvision>=0.15.2 opencv-python tqdm tensorboard ================================================ FILE: pytorch_keypoint/DeepPose/train.py ================================================ import os import torch import torch.amp from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter import transforms from model import create_deep_pose_model from datasets import WFLWDataset from train_utils.train_eval_utils import train_one_epoch, evaluate def get_args_parser(add_help=True): import argparse parser = argparse.ArgumentParser(description="PyTorch DeepPose Training", add_help=add_help) parser.add_argument("--dataset_dir", type=str, default="/home/wz/datasets/WFLW", help="WFLW dataset directory") parser.add_argument("--device", type=str, default="cuda:0", help="training device, e.g. cpu, cuda:0") parser.add_argument("--save_weights_dir", type=str, default="./weights", help="save dir for model weights") parser.add_argument("--save_freq", type=int, default=10, help="save frequency for weights and generated imgs") parser.add_argument("--eval_freq", type=int, default=5, help="evaluate frequency") parser.add_argument('--img_hw', default=[256, 256], nargs='+', type=int, help='training image size[h, w]') parser.add_argument("--epochs", type=int, default=210, help="number of epochs of training") parser.add_argument("--batch_size", type=int, default=32, help="size of the batches") parser.add_argument("--num_workers", type=int, default=8, help="number of workers, default: 8") parser.add_argument("--num_keypoints", type=int, default=98, help="number of keypoints") parser.add_argument("--lr", type=float, default=5e-4, help="Adam: learning rate") parser.add_argument('--lr_steps', default=[170, 200], nargs='+', type=int, help='decrease lr every step-size epochs') parser.add_argument("--warmup_epoch", type=int, default=10, help="number of warmup epoch for training") parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') parser.add_argument('--test_only', action="store_true", help='Only test the model') return parser def main(args): torch.manual_seed(1234) dataset_dir = args.dataset_dir save_weights_dir = args.save_weights_dir save_freq = args.save_freq eval_freq = args.eval_freq num_keypoints = args.num_keypoints num_workers = args.num_workers epochs = args.epochs bs = args.batch_size start_epoch = 0 img_hw = args.img_hw os.makedirs(save_weights_dir, exist_ok=True) if "cuda" in args.device and not torch.cuda.is_available(): device = torch.device("cpu") else: device = torch.device(args.device) print(f"using device: {device} for training.") # tensorboard writer tb_writer = SummaryWriter() # create model model = create_deep_pose_model(num_keypoints) model.to(device) # config dataset and dataloader data_transform = { "train": transforms.Compose([ transforms.AffineTransform(scale_factor=(0.65, 1.35), rotate=45, shift_factor=0.15, fixed_size=img_hw), transforms.RandomHorizontalFlip(0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } train_dataset = WFLWDataset(root=dataset_dir, train=True, transforms=data_transform["train"]) val_dataset = WFLWDataset(root=dataset_dir, train=False, transforms=data_transform["val"]) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, pin_memory=True, num_workers=num_workers, collate_fn=WFLWDataset.collate_fn, persistent_workers=True) val_loader = DataLoader(val_dataset, batch_size=bs, shuffle=False, pin_memory=True, num_workers=num_workers, collate_fn=WFLWDataset.collate_fn, persistent_workers=True) # define optimizers optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # define learning rate scheduler warmup_scheduler = torch.optim.lr_scheduler.LinearLR( optimizer=optimizer, start_factor=0.01, end_factor=1.0, total_iters=len(train_loader) * args.warmup_epoch ) multi_step_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=[len(train_loader) * i for i in args.lr_steps], gamma=0.1 ) lr_scheduler = torch.optim.lr_scheduler.ChainedScheduler([warmup_scheduler, multi_step_scheduler]) if args.resume: assert os.path.exists(args.resume) checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) start_epoch = checkpoint['epoch'] + 1 print("the training process from epoch{}...".format(start_epoch)) if args.test_only: evaluate(model=model, epoch=start_epoch, val_loader=val_loader, device=device, tb_writer=tb_writer, affine_points_torch_func=transforms.affine_points_torch, num_keypoints=num_keypoints, img_hw=img_hw) return for epoch in range(start_epoch, epochs): # train train_one_epoch(model=model, epoch=epoch, train_loader=train_loader, device=device, optimizer=optimizer, lr_scheduler=lr_scheduler, tb_writer=tb_writer, num_keypoints=num_keypoints, img_hw=img_hw) # eval if epoch % eval_freq == 0 or epoch == args.epochs - 1: evaluate(model=model, epoch=epoch, val_loader=val_loader, device=device, tb_writer=tb_writer, affine_points_torch_func=transforms.affine_points_torch, num_keypoints=num_keypoints, img_hw=img_hw) # save weights if epoch % save_freq == 0 or epoch == args.epochs - 1: save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch } torch.save(save_files, os.path.join(save_weights_dir, f"model_weights_{epoch}.pth")) if __name__ == '__main__': args = get_args_parser().parse_args() main(args) ================================================ FILE: pytorch_keypoint/DeepPose/train_multi_GPU.py ================================================ import os import torch import torch.amp from torch.utils.data import DataLoader, DistributedSampler, BatchSampler from torch.utils.tensorboard import SummaryWriter import transforms from model import create_deep_pose_model from datasets import WFLWDataset from train_utils.train_eval_utils import train_one_epoch, evaluate from train_utils.distributed_utils import init_distributed_mode, is_main_process def get_args_parser(add_help=True): import argparse parser = argparse.ArgumentParser(description="PyTorch DeepPose Training", add_help=add_help) parser.add_argument("--dataset_dir", type=str, default="/home/wz/datasets/WFLW", help="WFLW dataset directory") parser.add_argument("--device", type=str, default="cuda", help="training device, e.g. cpu, cuda") parser.add_argument("--save_weights_dir", type=str, default="./weights", help="save dir for model weights") parser.add_argument("--save_freq", type=int, default=5, help="save frequency for weights and generated imgs") parser.add_argument("--eval_freq", type=int, default=5, help="evaluate frequency") parser.add_argument('--img_hw', default=[256, 256], nargs='+', type=int, help='training image size[h, w]') parser.add_argument("--epochs", type=int, default=210, help="number of epochs of training") parser.add_argument("--batch_size", type=int, default=32, help="size of the batches") parser.add_argument("--num_workers", type=int, default=8, help="number of workers, default: 8") parser.add_argument("--num_keypoints", type=int, default=98, help="number of keypoints") parser.add_argument("--lr", type=float, default=5e-4, help="Adam: learning rate") parser.add_argument('--lr_steps', default=[170, 200], nargs='+', type=int, help='decrease lr every step-size epochs') parser.add_argument("--warmup_epoch", type=int, default=10, help="number of warmup epoch for training") parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') parser.add_argument('--test_only', action="store_true", help='Only test the model') return parser def main(args): torch.manual_seed(1234) init_distributed_mode(args) if not args.distributed: raise EnvironmentError("not support distributed training.") dataset_dir = args.dataset_dir save_weights_dir = args.save_weights_dir save_freq = args.save_freq eval_freq = args.eval_freq num_keypoints = args.num_keypoints num_workers = args.num_workers epochs = args.epochs bs = args.batch_size start_epoch = 0 img_hw = args.img_hw device = torch.device(args.device) os.makedirs(save_weights_dir, exist_ok=True) # adjust learning rate args.lr = args.lr * args.world_size tb_writer = None if is_main_process(): # tensorboard writer tb_writer = SummaryWriter() # create model model = create_deep_pose_model(num_keypoints) model.to(device) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # config dataset and dataloader data_transform = { "train": transforms.Compose([ transforms.AffineTransform(scale_factor=(0.65, 1.35), rotate=45, shift_factor=0.15, fixed_size=img_hw), transforms.RandomHorizontalFlip(0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } train_dataset = WFLWDataset(root=dataset_dir, train=True, transforms=data_transform["train"]) val_dataset = WFLWDataset(root=dataset_dir, train=False, transforms=data_transform["val"]) train_sampler = DistributedSampler(train_dataset) val_sampler = DistributedSampler(val_dataset) train_batch_sampler = BatchSampler(train_sampler, args.batch_size, drop_last=True) train_loader = DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=num_workers, collate_fn=WFLWDataset.collate_fn, persistent_workers=True) val_loader = DataLoader(val_dataset, batch_size=bs, sampler=val_sampler, shuffle=False, pin_memory=True, num_workers=num_workers, collate_fn=WFLWDataset.collate_fn, persistent_workers=True) # define optimizers optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # define learning rate scheduler warmup_scheduler = torch.optim.lr_scheduler.LinearLR( optimizer=optimizer, start_factor=0.01, end_factor=1.0, total_iters=len(train_loader) * args.warmup_epoch ) multi_step_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=[len(train_loader) * i for i in args.lr_steps], gamma=0.1 ) lr_scheduler = torch.optim.lr_scheduler.ChainedScheduler([warmup_scheduler, multi_step_scheduler]) if args.resume: assert os.path.exists(args.resume) checkpoint = torch.load(args.resume, map_location='cpu') model.module.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) start_epoch = checkpoint['epoch'] + 1 print("the training process from epoch{}...".format(start_epoch)) if args.test_only: evaluate(model=model, epoch=start_epoch, val_loader=val_loader, device=device, tb_writer=tb_writer, affine_points_torch_func=transforms.affine_points_torch, num_keypoints=num_keypoints, img_hw=img_hw) return for epoch in range(start_epoch, epochs): # train train_sampler.set_epoch(epoch) # shuffle training data train_one_epoch(model=model, epoch=epoch, train_loader=train_loader, device=device, optimizer=optimizer, lr_scheduler=lr_scheduler, tb_writer=tb_writer, num_keypoints=num_keypoints, img_hw=img_hw) # eval if epoch % eval_freq == 0 or epoch == args.epochs - 1: evaluate(model=model, epoch=epoch, val_loader=val_loader, device=device, tb_writer=tb_writer, affine_points_torch_func=transforms.affine_points_torch, num_keypoints=num_keypoints, img_hw=img_hw) # save weights if is_main_process() and (epoch % save_freq == 0 or epoch == args.epochs - 1): save_files = { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch } torch.save(save_files, os.path.join(save_weights_dir, f"model_weights_{epoch}.pth")) if __name__ == '__main__': args = get_args_parser().parse_args() main(args) ================================================ FILE: pytorch_keypoint/DeepPose/train_utils/distributed_utils.py ================================================ import os import torch import torch.distributed as dist def reduce_value(input_value: torch.Tensor, average=True) -> torch.Tensor: """ Args: input_value (Tensor): all the values will be reduced average (bool): whether to do average or sum Reduce the values from all processes so that all processes have the averaged results. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_value with torch.inference_mode(): # 多GPU的情况 dist.all_reduce(input_value) if average: input_value /= world_size return input_value def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def init_distributed_mode(args): if not torch.cuda.is_available(): print('No available device') args.distributed = False return if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print(f'| distributed init (rank {args.rank}): {args.dist_url}', flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_keypoint/DeepPose/train_utils/losses.py ================================================ import math import torch import torch.nn as nn import torch.nn.functional as F class L1Loss(nn.Module): def __init__(self) -> None: super().__init__() def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor: """ Args: pred [N, K, 2] label [N, K, 2] mask [N, K] """ losses = F.l1_loss(pred, label, reduction="none") if mask is not None: # filter invalid keypoints(e.g. out of range) losses = losses * mask.unsqueeze(2) return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0) class SmoothL1Loss(nn.Module): def __init__(self) -> None: super().__init__() def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor: """ Args: pred [N, K, 2] label [N, K, 2] mask [N, K] """ losses = F.smooth_l1_loss(pred, label, reduction="none") if mask is not None: # filter invalid keypoints(e.g. out of range) losses = losses * mask.unsqueeze(2) return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0) class L2Loss(nn.Module): def __init__(self) -> None: super().__init__() def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor: """ Args: pred [N, K, 2] label [N, K, 2] mask [N, K] """ losses = F.mse_loss(pred, label, reduction="none") if mask is not None: # filter invalid keypoints(e.g. out of range) losses = losses * mask.unsqueeze(2) return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0) class WingLoss(nn.Module): """refer https://github.com/TropComplique/wing-loss/blob/master/loss.py """ def __init__(self, w: float = 10.0, epsilon: float = 2.0) -> None: super().__init__() self.w = w self.epsilon = epsilon self.C = w * (1.0 - math.log(1.0 + w / epsilon)) def forward(self, pred: torch.Tensor, label: torch.Tensor, wh_tensor: torch.Tensor, mask: torch = None) -> torch.Tensor: """ Args: pred [N, K, 2] wh_tensor [1, 1, 2] label [N, K, 2] mask [N, K] """ delta = (pred - label).abs() * wh_tensor # rel to abs losses = torch.where(condition=self.w > delta, input=self.w * torch.log(1.0 + delta / self.epsilon), other=delta - self.C) if mask is not None: # filter invalid keypoints(e.g. out of range) losses = losses * mask.unsqueeze(2) return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0) class SoftWingLoss(nn.Module): """refer mmpose/models/losses/regression_loss.py """ def __init__(self, omega1: float = 2.0, omega2: float = 20.0, epsilon: float = 0.5) -> None: super().__init__() self.omega1 = omega1 self.omega2 = omega2 self.epsilon = epsilon self.B = omega1 - omega2 * math.log(1.0 + omega1 / epsilon) def forward(self, pred: torch.Tensor, label: torch.Tensor, wh_tensor: torch.Tensor, mask: torch = None) -> torch.Tensor: """ Args: pred [N, K, 2] label [N, K, 2] wh_tensor [1, 1, 2] mask [N, K] """ delta = (pred - label).abs() * wh_tensor # rel to abs losses = torch.where(condition=delta < self.omega1, input=delta, other=self.omega2 * torch.log(1.0 + delta / self.epsilon) + self.B) if mask is not None: # filter invalid keypoints(e.g. out of range) losses = losses * mask.unsqueeze(2) loss = torch.mean(torch.sum(losses, dim=(1, 2)), dim=0) return loss ================================================ FILE: pytorch_keypoint/DeepPose/train_utils/metrics.py ================================================ import torch from .distributed_utils import reduce_value, is_dist_avail_and_initialized class NMEMetric: def __init__(self, device: torch.device) -> None: # 两眼外角点对应keypoint索引 self.keypoint_idxs = [60, 72] self.nme_accumulator: float = 0. self.counter: float = 0. self.device = device def update(self, pred: torch.Tensor, gt: torch.Tensor, mask: torch.Tensor = None): """ Args: pred (shape [N, K, 2]): pred keypoints gt (shape [N, K, 2]): gt keypoints mask (shape [N, K]): valid keypoints mask """ # ion: inter-ocular distance normalized error ion = torch.linalg.norm(gt[:, self.keypoint_idxs[0]] - gt[:, self.keypoint_idxs[1]], dim=1) valid_ion_mask = ion > 0 if mask is None: mask = valid_ion_mask else: mask = torch.logical_and(mask, valid_ion_mask.unsqueeze_(dim=1)).sum(dim=1) > 0 num_valid = mask.sum().item() # equal: (pred - gt).pow(2).sum(dim=2).pow(0.5).mean(dim=1) l2_dis = torch.linalg.norm(pred - gt, dim=2)[mask].mean(dim=1) # [N] # avoid divide by zero ion = ion[mask] # [N] self.nme_accumulator += l2_dis.div(ion).sum().item() self.counter += num_valid def evaluate(self): return self.nme_accumulator / self.counter def synchronize_results(self): if is_dist_avail_and_initialized(): self.nme_accumulator = reduce_value( torch.as_tensor(self.nme_accumulator, device=self.device), average=False ).item() self.counter = reduce_value( torch.as_tensor(self.counter, device=self.device), average=False ) if __name__ == '__main__': metric = NMEMetric() metric.update(pred=torch.randn(32, 98, 2), gt=torch.randn(32, 98, 2), mask=torch.randn(32, 98)) print(metric.evaluate()) ================================================ FILE: pytorch_keypoint/DeepPose/train_utils/train_eval_utils.py ================================================ import sys import math from typing import Callable, List from tqdm import tqdm import torch from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter from .losses import WingLoss from .metrics import NMEMetric from .distributed_utils import is_main_process, reduce_value def train_one_epoch(model: torch.nn.Module, epoch: int, train_loader: DataLoader, device: torch.device, optimizer: torch.optim.Optimizer, lr_scheduler: torch.optim.lr_scheduler.LRScheduler, tb_writer: SummaryWriter, num_keypoints: int, img_hw: List[int]) -> None: # define loss function loss_func = WingLoss() wh_tensor = torch.as_tensor(img_hw[::-1], dtype=torch.float32, device=device).reshape([1, 1, 2]) model.train() train_bar = train_loader if is_main_process(): train_bar = tqdm(train_loader, file=sys.stdout) for step, (imgs, targets) in enumerate(train_bar): imgs = imgs.to(device) labels = targets["keypoints"].to(device) optimizer.zero_grad() # use mixed precision to speed up training with torch.autocast(device_type=device.type): pred: torch.Tensor = model(imgs) loss: torch.Tensor = loss_func(pred.reshape((-1, num_keypoints, 2)), labels, wh_tensor) loss_value = reduce_value(loss).item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) sys.exit(1) loss.backward() optimizer.step() lr_scheduler.step() if is_main_process(): train_bar.desc = f"train epoch[{epoch}] loss:{loss_value:.3f}" global_step = epoch * len(train_loader) + step tb_writer.add_scalar("train loss", loss.item(), global_step=global_step) tb_writer.add_scalar("learning rate", optimizer.param_groups[0]["lr"], global_step=global_step) @torch.inference_mode() def evaluate(model: torch.nn.Module, epoch: int, val_loader: DataLoader, device: torch.device, tb_writer: SummaryWriter, affine_points_torch_func: Callable, num_keypoints: int, img_hw: List[int]) -> None: model.eval() metric = NMEMetric(device=device) wh_tensor = torch.as_tensor(img_hw[::-1], dtype=torch.float32, device=device).reshape([1, 1, 2]) eval_bar = val_loader if is_main_process(): eval_bar = tqdm(val_loader, file=sys.stdout, desc="evaluation") for step, (imgs, targets) in enumerate(eval_bar): imgs = imgs.to(device) m_invs = targets["m_invs"].to(device) labels = targets["ori_keypoints"].to(device) pred = model(imgs) pred = pred.reshape((-1, num_keypoints, 2)) # [N, K, 2] pred = pred * wh_tensor # rel coord to abs coord pred = affine_points_torch_func(pred, m_invs) metric.update(pred, labels) metric.synchronize_results() if is_main_process(): nme = metric.evaluate() tb_writer.add_scalar("evaluation nme", nme, global_step=epoch) print(f"evaluation NME[{epoch}]: {nme:.3f}") ================================================ FILE: pytorch_keypoint/DeepPose/transforms.py ================================================ import math import random from typing import Tuple import cv2 import torch import numpy as np from wflw_horizontal_flip_indices import wflw_flip_indices_dict def adjust_box(xmin: int, ymin: int, xmax: int, ymax: int, fixed_size: Tuple[int, int]): """通过增加w或者h的方式保证输入图片的长宽比固定""" w = xmax - xmin h = ymax - ymin hw_ratio = fixed_size[0] / fixed_size[1] if h / w > hw_ratio: # 需要在w方向padding wi = h / hw_ratio pad_w = (wi - w) / 2 xmin = xmin - pad_w xmax = xmax + pad_w else: # 需要在h方向padding hi = w * hw_ratio pad_h = (hi - h) / 2 ymin = ymin - pad_h ymax = ymax + pad_h return xmin, ymin, xmax, ymax def affine_points_np(keypoint: np.ndarray, m: np.ndarray) -> np.ndarray: """ Args: keypoint [k, 2] m [2, 3] """ ones = np.ones((keypoint.shape[0], 1), dtype=np.float32) keypoint = np.concatenate([keypoint, ones], axis=1) # [k, 3] new_keypoint = np.matmul(keypoint, m.T) return new_keypoint def affine_points_torch(keypoint: torch.Tensor, m: torch.Tensor) -> torch.Tensor: """ Args: keypoint [n, k, 2] m [n, 2, 3] """ dtype = keypoint.dtype device = keypoint.device n, k, _ = keypoint.shape ones = torch.ones(size=(n, k, 1), dtype=dtype, device=device) keypoint = torch.concat([keypoint, ones], dim=2) # [n, k, 3] new_keypoint = torch.matmul(keypoint, m.transpose(1, 2)) return new_keypoint class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class Resize(object): def __init__(self, h: int, w: int): self.h = h self.w = w def __call__(self, image: np.ndarray, target): image = cv2.resize(image, dsize=(self.w, self.h), fx=0, fy=0, interpolation=cv2.INTER_LINEAR) return image, target class ToTensor(object): """将opencv图像转为Tensor, HWC2CHW, 并缩放数值至0~1""" def __call__(self, image, target): image = torch.from_numpy(image).permute((2, 0, 1)) image = image.to(torch.float32) / 255. if "ori_keypoint" in target and "keypoint" in target: target["ori_keypoint"] = torch.from_numpy(target["ori_keypoint"]) target["keypoint"] = torch.from_numpy(target["keypoint"]) target["m_inv"] = torch.from_numpy(target["m_inv"]) return image, target class Normalize(object): def __init__(self, mean=None, std=None): self.mean = torch.as_tensor(mean, dtype=torch.float32).reshape((3, 1, 1)) self.std = torch.as_tensor(std, dtype=torch.float32).reshape((3, 1, 1)) def __call__(self, image: torch.Tensor, target: dict): image.sub_(self.mean).div_(self.std) if "keypoint" in target: _, h, w = image.shape keypoint = target["keypoint"] keypoint[:, 0] /= w keypoint[:, 1] /= h target["keypoint"] = keypoint return image, target class RandomHorizontalFlip(object): """随机对输入图片进行水平翻转""" def __init__(self, p: float = 0.5): self.p = p self.wflw_flip_ids = list(wflw_flip_indices_dict.values()) def __call__(self, image: np.ndarray, target: dict): if random.random() < self.p: # [h, w, c] image = np.ascontiguousarray(np.flip(image, axis=[1])) # [k, 2] if "keypoint" in target: _, w, _ = image.shape keypoint: torch.Tensor = target["keypoint"] keypoint = keypoint[self.wflw_flip_ids] keypoint[:, 0] = w - keypoint[:, 0] target["keypoint"] = keypoint return image, target class AffineTransform(object): """shift+scale+rotation""" def __init__(self, scale_factor: Tuple[float, float] = (0.65, 1.35), scale_prob: float = 1., rotate: int = 45, rotate_prob: float = 0.6, shift_factor: float = 0.15, shift_prob: float = 0.3, fixed_size: Tuple[int, int] = (256, 256)): self.scale_factor = scale_factor self.scale_prob = scale_prob self.rotate = rotate self.rotate_prob = rotate_prob self.shift_factor = shift_factor self.shift_prob = shift_prob self.fixed_size = fixed_size # (h, w) def __call__(self, img: np.ndarray, target: dict): src_xmin, src_ymin, src_xmax, src_ymax = adjust_box(*target["box"], fixed_size=self.fixed_size) src_w = src_xmax - src_xmin src_h = src_ymax - src_ymin if random.random() < self.shift_prob: shift_w_factor = random.uniform(-self.shift_factor, self.shift_factor) shift_h_factor = random.uniform(-self.shift_factor, self.shift_factor) src_xmin -= int(src_w * shift_w_factor) src_xmax -= int(src_w * shift_w_factor) src_ymin -= int(src_h * shift_h_factor) src_ymax -= int(src_h * shift_h_factor) src_center = np.array([(src_xmin + src_xmax) / 2, (src_ymin + src_ymax) / 2], dtype=np.float32) src_p2 = src_center + np.array([0, -src_h / 2], dtype=np.float32) # top middle src_p3 = src_center + np.array([src_w / 2, 0], dtype=np.float32) # right middle dst_center = np.array([(self.fixed_size[1] - 1) / 2, (self.fixed_size[0] - 1) / 2], dtype=np.float32) dst_p2 = np.array([(self.fixed_size[1] - 1) / 2, 0], dtype=np.float32) # top middle dst_p3 = np.array([self.fixed_size[1] - 1, (self.fixed_size[0] - 1) / 2], dtype=np.float32) # right middle if random.random() < self.scale_prob: scale = random.uniform(*self.scale_factor) src_w = src_w * scale src_h = src_h * scale src_p2 = src_center + np.array([0, -src_h / 2], dtype=np.float32) # top middle src_p3 = src_center + np.array([src_w / 2, 0], dtype=np.float32) # right middle if random.random() < self.rotate_prob: angle = random.randint(-self.rotate, self.rotate) # 角度制 angle = angle / 180 * math.pi # 弧度制 src_p2 = src_center + np.array([src_h / 2 * math.sin(angle), -src_h / 2 * math.cos(angle)], dtype=np.float32) src_p3 = src_center + np.array([src_w / 2 * math.cos(angle), src_w / 2 * math.sin(angle)], dtype=np.float32) src = np.stack([src_center, src_p2, src_p3]) dst = np.stack([dst_center, dst_p2, dst_p3]) m = cv2.getAffineTransform(src, dst).astype(np.float32) # 计算正向仿射变换矩阵 m_inv = cv2.getAffineTransform(dst, src).astype(np.float32) # 计算逆向仿射变换矩阵,方便后续还原 # 对图像进行仿射变换 warp_img = cv2.warpAffine(src=img, M=m, dsize=tuple(self.fixed_size[::-1]), # [w, h] borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0), flags=cv2.INTER_LINEAR) if "keypoint" in target: keypoint = target["keypoint"] keypoint = affine_points_np(keypoint, m) target["keypoint"] = keypoint # from utils import draw_keypoints # keypoint[:, 0] /= self.fixed_size[1] # keypoint[:, 1] /= self.fixed_size[0] # draw_keypoints(warp_img, keypoint, "affine.jpg", 2, is_rel=True) target["m"] = m target["m_inv"] = m_inv return warp_img, target ================================================ FILE: pytorch_keypoint/DeepPose/utils.py ================================================ import cv2 import numpy as np def draw_keypoints(img: np.ndarray, coordinate: np.ndarray, save_path: str, radius: int = 3, is_rel: bool = False): coordinate_ = coordinate.copy() if is_rel: h, w, c = img.shape coordinate_[:, 0] *= w coordinate_[:, 1] *= h coordinate_ = coordinate_.astype(np.int64).tolist() img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for x, y in coordinate_: cv2.circle(img_bgr, center=(x, y), radius=radius, color=(255, 0, 0), thickness=-1) cv2.imwrite(save_path, img_bgr) ================================================ FILE: pytorch_keypoint/DeepPose/wflw_horizontal_flip_indices.py ================================================ wflw_flip_indices_dict = { 0: 32, 1: 31, 2: 30, 3: 29, 4: 28, 5: 27, 6: 26, 7: 25, 8: 24, 9: 23, 10: 22, 11: 21, 12: 20, 13: 19, 14: 18, 15: 17, 16: 16, 17: 15, 18: 14, 19: 13, 20: 12, 21: 11, 22: 10, 23: 9, 24: 8, 25: 7, 26: 6, 27: 5, 28: 4, 29: 3, 30: 2, 31: 1, 32: 0, 33: 46, 34: 45, 35: 44, 36: 43, 37: 42, 38: 50, 39: 49, 40: 48, 41: 47, 42: 37, 43: 36, 44: 35, 45: 34, 46: 33, 47: 41, 48: 40, 49: 39, 50: 38, 51: 51, 52: 52, 53: 53, 54: 54, 55: 59, 56: 58, 57: 57, 58: 56, 59: 55, 60: 72, 61: 71, 62: 70, 63: 69, 64: 68, 65: 75, 66: 74, 67: 73, 68: 64, 69: 63, 70: 62, 71: 61, 72: 60, 73: 67, 74: 66, 75: 65, 76: 82, 77: 81, 78: 80, 79: 79, 80: 78, 81: 77, 82: 76, 83: 87, 84: 86, 85: 85, 86: 84, 87: 83, 88: 92, 89: 91, 90: 90, 91: 89, 92: 88, 93: 95, 94: 94, 95: 93, 96: 97, 97: 96, } ================================================ FILE: pytorch_keypoint/HRNet/README.md ================================================ # HRNet ## 该项目主要参考以下仓库 * https://github.com/leoxiaobin/deep-high-resolution-net.pytorch * https://github.com/stefanopini/simple-HRNet ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10或以上 * pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs)) * Ubuntu或Centos(不建议Windows) * 最好使用GPU训练 * 详细环境配置见`requirements.txt` ## 文件结构: ``` ├── model: 搭建HRNet相关代码 ├── train_utils: 训练验证相关模块(包括coco验证相关) ├── my_dataset_coco.py: 自定义dataset用于读取COCO2017数据集 ├── person_keypoints.json: COCO数据集中人体关键点相关信息 ├── train.py: 单GPU/CPU训练脚本 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测 ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 └── transforms.py: 数据增强相关 ``` ## 预训练权重下载地址(下载后放入当前文件夹中): 由于原作者提供的预训练权重(Imagenet和COCO)是放在GoogleDrive和OneDrive上的,国内无法正常访问。所有我提前将权重文件全部下载并放在百度网盘中, 需要的可以自行下载,链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o 下载后的目录结构如下: ``` ├── pytorch ├── pose_mpii ├── pose_coco │ ├── pose_resnet_50_384x288.pth │ ├── pose_resnet_50_256x192.pth │ ├── pose_resnet_101_384x288.pth │ ├── pose_resnet_101_256x192.pth │ ├── pose_hrnet_w32_384x288.pth │ └── pose_hrnet_w32_256x192.pth └── imagenet ├── resnet50-19c8e357.pth ├── resnet152-b121ed2d.pth ├── resnet101-5d3b4d8f.pth └── hrnet_w32-36af842e.pth ``` 如果要直接使用在COCO数据集上预训练好的权重进行预测,下载pose_coco下的`pose_hrnet_w32_256x192.pth`使用即可。 如果要从头训练网络,下载imagenet下的`hrnet_w32-36af842e.pth`文件,并重命名为`hrnet_w32.pth`即可。 除此之外,还有一个`person_detection_results`文件,存储的是论文中提到的人体检测器的检测结果,如果需要使用可以下载,但个人建议直接使用COCO val中GT信息即可。 链接: https://pan.baidu.com/s/19Z4mmNHUD934GQ9QYcF5iw 密码: i08q ## 数据集,本例程使用的是COCO2017数据集 * COCO官网地址:https://cocodataset.org/ * 对数据集不了解的可以看下我写的博文:https://blog.csdn.net/qq_37541097/article/details/113247318 * 这里以下载coco2017数据集为例,主要下载三个文件: * `2017 Train images [118K/18GB]`:训练过程中使用到的所有图像文件 * `2017 Val images [5K/1GB]`:验证过程中使用到的所有图像文件 * `2017 Train/Val annotations [241MB]`:对应训练集和验证集的标注json文件 * 都解压到`coco2017`文件夹下,可得到如下文件夹结构: ``` ├── coco2017: 数据集根目录 ├── train2017: 所有训练图像文件夹(118287张) ├── val2017: 所有验证图像文件夹(5000张) └── annotations: 对应标注文件夹 ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件 ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件 ├── captions_train2017.json: 对应图像描述的训练集标注文件 ├── captions_val2017.json: 对应图像描述的验证集标注文件 ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件 └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹 ``` ## 训练方法 * 注:该项目从头训练HRNet在MS COCO2017的val上的mAP[@0.50:0.95]为76.1,利用原作者提供的权重在val上的mAP[@0.50:0.95]为76.6,相差0.5个点, 暂时没有找到原因。由于训练该网络需要迭代210个epoch(按照论文中的数据),训练时间很长,建议直接使用原作者提供训练好的权重。另外,在训练过程中发现GPU的利用率 并不高(在20%~60%之间浮动),暂时猜测是网络结构的原因。 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 确保设置好`--num-joints`(对于人体检测的关键点个数,COCO是17个点)、`--fixed-size`(输入目标图像的高宽,默认[256, 192])和`--data-path`(指向`coco2017`目录) * 若要使用单GPU训练直接使用train.py训练脚本 * 若要使用多GPU训练,使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py` ## 注意事项 1. 在使用训练脚本时,注意要将`--data-path`设置为自己存放数据集的**根目录**: 假设要使用COCO数据集,启用自定义数据集读取CocoDetection并将数据集解压到成/data/coco2017目录下 ``` python train.py --data-path /data/coco2017 ``` 2. 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前10个值是COCO指标,后面两个值是训练平均损失以及学习率 3. 在使用预测脚本时,如果要读取自己训练好的权重要将`weights_path`设置为你自己生成的权重路径。 ## 如果对HRNet网络不是很理解可参考我的bilibili https://www.bilibili.com/video/BV1bB4y1y7qP ## 进一步了解该项目,以及对HRNet代码的分析可参考我的bilibili https://www.bilibili.com/video/BV1ar4y157JM ## HRNet网络结构图 ![HRNet.png](HRNet.png) ================================================ FILE: pytorch_keypoint/HRNet/draw_utils.py ================================================ import numpy as np from numpy import ndarray import PIL from PIL import ImageDraw, ImageFont from PIL.Image import Image # COCO 17 points point_name = ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"] point_color = [(240, 2, 127), (240, 2, 127), (240, 2, 127), (240, 2, 127), (240, 2, 127), (255, 255, 51), (255, 255, 51), (254, 153, 41), (44, 127, 184), (217, 95, 14), (0, 0, 255), (255, 255, 51), (255, 255, 51), (228, 26, 28), (49, 163, 84), (252, 176, 243), (0, 176, 240), (255, 255, 0), (169, 209, 142), (255, 255, 0), (169, 209, 142), (255, 255, 0), (169, 209, 142)] def draw_keypoints(img: Image, keypoints: ndarray, scores: ndarray = None, thresh: float = 0.2, r: int = 2, draw_text: bool = False, font: str = 'arial.ttf', font_size: int = 10): if isinstance(img, ndarray): img = PIL.Image.fromarray(img) if scores is None: scores = np.ones(keypoints.shape[0]) if draw_text: try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() draw = ImageDraw.Draw(img) for i, (point, score) in enumerate(zip(keypoints, scores)): if score > thresh and np.max(point) > 0: draw.ellipse([point[0] - r, point[1] - r, point[0] + r, point[1] + r], fill=point_color[i], outline=(255, 255, 255)) if draw_text: draw.text((point[0] + r, point[1] + r), text=point_name[i], font=font) return img ================================================ FILE: pytorch_keypoint/HRNet/model/__init__.py ================================================ from .hrnet import HighResolutionNet ================================================ FILE: pytorch_keypoint/HRNet/model/hrnet.py ================================================ import torch.nn as nn BN_MOMENTUM = 0.1 class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class StageModule(nn.Module): def __init__(self, input_branches, output_branches, c): """ 构建对应stage,即用来融合不同尺度的实现 :param input_branches: 输入的分支数,每个分支对应一种尺度 :param output_branches: 输出的分支数 :param c: 输入的第一个分支通道数 """ super().__init__() self.input_branches = input_branches self.output_branches = output_branches self.branches = nn.ModuleList() for i in range(self.input_branches): # 每个分支上都先通过4个BasicBlock w = c * (2 ** i) # 对应第i个分支的通道数 branch = nn.Sequential( BasicBlock(w, w), BasicBlock(w, w), BasicBlock(w, w), BasicBlock(w, w) ) self.branches.append(branch) self.fuse_layers = nn.ModuleList() # 用于融合每个分支上的输出 for i in range(self.output_branches): self.fuse_layers.append(nn.ModuleList()) for j in range(self.input_branches): if i == j: # 当输入、输出为同一个分支时不做任何处理 self.fuse_layers[-1].append(nn.Identity()) elif i < j: # 当输入分支j大于输出分支i时(即输入分支下采样率大于输出分支下采样率), # 此时需要对输入分支j进行通道调整以及上采样,方便后续相加 self.fuse_layers[-1].append( nn.Sequential( nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM), nn.Upsample(scale_factor=2.0 ** (j - i), mode='nearest') ) ) else: # i > j # 当输入分支j小于输出分支i时(即输入分支下采样率小于输出分支下采样率), # 此时需要对输入分支j进行通道调整以及下采样,方便后续相加 # 注意,这里每次下采样2x都是通过一个3x3卷积层实现的,4x就是两个,8x就是三个,总共i-j个 ops = [] # 前i-j-1个卷积层不用变通道,只进行下采样 for k in range(i - j - 1): ops.append( nn.Sequential( nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(c * (2 ** j), momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) ) # 最后一个卷积层不仅要调整通道,还要进行下采样 ops.append( nn.Sequential( nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM) ) ) self.fuse_layers[-1].append(nn.Sequential(*ops)) self.relu = nn.ReLU(inplace=True) def forward(self, x): # 每个分支通过对应的block x = [branch(xi) for branch, xi in zip(self.branches, x)] # 接着融合不同尺寸信息 x_fused = [] for i in range(len(self.fuse_layers)): x_fused.append( self.relu( sum([self.fuse_layers[i][j](x[j]) for j in range(len(self.branches))]) ) ) return x_fused class HighResolutionNet(nn.Module): def __init__(self, base_channel: int = 32, num_joints: int = 17): super().__init__() # Stem self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) # Stage1 downsample = nn.Sequential( nn.Conv2d(64, 256, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(256, momentum=BN_MOMENTUM) ) self.layer1 = nn.Sequential( Bottleneck(64, 64, downsample=downsample), Bottleneck(256, 64), Bottleneck(256, 64), Bottleneck(256, 64) ) self.transition1 = nn.ModuleList([ nn.Sequential( nn.Conv2d(256, base_channel, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(base_channel, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ), nn.Sequential( nn.Sequential( # 这里又使用一次Sequential是为了适配原项目中提供的权重 nn.Conv2d(256, base_channel * 2, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(base_channel * 2, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) ) ]) # Stage2 self.stage2 = nn.Sequential( StageModule(input_branches=2, output_branches=2, c=base_channel) ) # transition2 self.transition2 = nn.ModuleList([ nn.Identity(), # None, - Used in place of "None" because it is callable nn.Identity(), # None, - Used in place of "None" because it is callable nn.Sequential( nn.Sequential( nn.Conv2d(base_channel * 2, base_channel * 4, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(base_channel * 4, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) ) ]) # Stage3 self.stage3 = nn.Sequential( StageModule(input_branches=3, output_branches=3, c=base_channel), StageModule(input_branches=3, output_branches=3, c=base_channel), StageModule(input_branches=3, output_branches=3, c=base_channel), StageModule(input_branches=3, output_branches=3, c=base_channel) ) # transition3 self.transition3 = nn.ModuleList([ nn.Identity(), # None, - Used in place of "None" because it is callable nn.Identity(), # None, - Used in place of "None" because it is callable nn.Identity(), # None, - Used in place of "None" because it is callable nn.Sequential( nn.Sequential( nn.Conv2d(base_channel * 4, base_channel * 8, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(base_channel * 8, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) ) ]) # Stage4 # 注意,最后一个StageModule只输出分辨率最高的特征层 self.stage4 = nn.Sequential( StageModule(input_branches=4, output_branches=4, c=base_channel), StageModule(input_branches=4, output_branches=4, c=base_channel), StageModule(input_branches=4, output_branches=1, c=base_channel) ) # Final layer self.final_layer = nn.Conv2d(base_channel, num_joints, kernel_size=1, stride=1) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.conv2(x) x = self.bn2(x) x = self.relu(x) x = self.layer1(x) x = [trans(x) for trans in self.transition1] # Since now, x is a list x = self.stage2(x) x = [ self.transition2[0](x[0]), self.transition2[1](x[1]), self.transition2[2](x[-1]) ] # New branch derives from the "upper" branch only x = self.stage3(x) x = [ self.transition3[0](x[0]), self.transition3[1](x[1]), self.transition3[2](x[2]), self.transition3[3](x[-1]), ] # New branch derives from the "upper" branch only x = self.stage4(x) x = self.final_layer(x[0]) return x ================================================ FILE: pytorch_keypoint/HRNet/my_dataset_coco.py ================================================ import os import copy import torch import numpy as np import cv2 import torch.utils.data as data from pycocotools.coco import COCO class CocoKeypoint(data.Dataset): def __init__(self, root, dataset="train", years="2017", transforms=None, det_json_path=None, fixed_size=(256, 192)): super().__init__() assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]' anno_file = f"person_keypoints_{dataset}{years}.json" assert os.path.exists(root), "file '{}' does not exist.".format(root) self.img_root = os.path.join(root, f"{dataset}{years}") assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root) self.anno_path = os.path.join(root, "annotations", anno_file) assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path) self.fixed_size = fixed_size self.mode = dataset self.transforms = transforms self.coco = COCO(self.anno_path) img_ids = list(sorted(self.coco.imgs.keys())) if det_json_path is not None: det = self.coco.loadRes(det_json_path) else: det = self.coco self.valid_person_list = [] obj_idx = 0 for img_id in img_ids: img_info = self.coco.loadImgs(img_id)[0] ann_ids = det.getAnnIds(imgIds=img_id) anns = det.loadAnns(ann_ids) for ann in anns: # only save person class if ann["category_id"] != 1: print(f'warning: find not support id: {ann["category_id"]}, only support id: 1 (person)') continue # COCO_val2017_detections_AP_H_56_person.json文件中只有det信息,没有keypoint信息,跳过检查 if det_json_path is None: # skip objs without keypoints annotation if "keypoints" not in ann: continue if max(ann["keypoints"]) == 0: continue xmin, ymin, w, h = ann['bbox'] # Use only valid bounding boxes if w > 0 and h > 0: info = { "box": [xmin, ymin, w, h], "image_path": os.path.join(self.img_root, img_info["file_name"]), "image_id": img_id, "image_width": img_info['width'], "image_height": img_info['height'], "obj_origin_hw": [h, w], "obj_index": obj_idx, "score": ann["score"] if "score" in ann else 1. } # COCO_val2017_detections_AP_H_56_person.json文件中只有det信息,没有keypoint信息,跳过 if det_json_path is None: keypoints = np.array(ann["keypoints"]).reshape([-1, 3]) visible = keypoints[:, 2] keypoints = keypoints[:, :2] info["keypoints"] = keypoints info["visible"] = visible self.valid_person_list.append(info) obj_idx += 1 def __getitem__(self, idx): target = copy.deepcopy(self.valid_person_list[idx]) image = cv2.imread(target["image_path"]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if self.transforms is not None: image, person_info = self.transforms(image, target) return image, target def __len__(self): return len(self.valid_person_list) @staticmethod def collate_fn(batch): imgs_tuple, targets_tuple = tuple(zip(*batch)) imgs_tensor = torch.stack(imgs_tuple) return imgs_tensor, targets_tuple if __name__ == '__main__': train = CocoKeypoint("/data/coco2017/", dataset="val") print(len(train)) t = train[0] print(t) ================================================ FILE: pytorch_keypoint/HRNet/person_keypoints.json ================================================ { "keypoints": ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder","right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist","left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"], "skeleton": [[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]], "flip_pairs": [[1,2], [3,4], [5,6], [7,8], [9,10], [11,12], [13,14], [15,16]], "kps_weights": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5], "upper_body_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "lower_body_ids": [11, 12, 13, 14, 15, 16] } ================================================ FILE: pytorch_keypoint/HRNet/plot_curve.py ================================================ import datetime import matplotlib.pyplot as plt def plot_loss_and_lr(train_loss, learning_rate): try: x = list(range(len(train_loss))) fig, ax1 = plt.subplots(1, 1) ax1.plot(x, train_loss, 'r', label='loss') ax1.set_xlabel("step") ax1.set_ylabel("loss") ax1.set_title("Train Loss and lr") plt.legend(loc='best') ax2 = ax1.twinx() ax2.plot(x, learning_rate, label='lr') ax2.set_ylabel("learning rate") ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 plt.legend(loc='best') handles1, labels1 = ax1.get_legend_handles_labels() handles2, labels2 = ax2.get_legend_handles_labels() plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) plt.close() print("successful save loss curve! ") except Exception as e: print(e) def plot_map(mAP): try: x = list(range(len(mAP))) plt.plot(x, mAP, label='mAp') plt.xlabel('epoch') plt.ylabel('mAP') plt.title('Eval mAP') plt.xlim(0, len(mAP)) plt.legend(loc='best') plt.savefig('./mAP.png') plt.close() print("successful save mAP curve!") except Exception as e: print(e) ================================================ FILE: pytorch_keypoint/HRNet/predict.py ================================================ import os import json import torch import numpy as np import cv2 import matplotlib.pyplot as plt from model import HighResolutionNet from draw_utils import draw_keypoints import transforms def predict_all_person(): # TODO pass def predict_single_person(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"using device: {device}") flip_test = True resize_hw = (256, 192) img_path = "./person.png" weights_path = "./pose_hrnet_w32_256x192.pth" keypoint_json_path = "person_keypoints.json" assert os.path.exists(img_path), f"file: {img_path} does not exist." assert os.path.exists(weights_path), f"file: {weights_path} does not exist." assert os.path.exists(keypoint_json_path), f"file: {keypoint_json_path} does not exist." data_transform = transforms.Compose([ transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=resize_hw), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # read json file with open(keypoint_json_path, "r") as f: person_info = json.load(f) # read single-person image img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_tensor, target = data_transform(img, {"box": [0, 0, img.shape[1] - 1, img.shape[0] - 1]}) img_tensor = torch.unsqueeze(img_tensor, dim=0) # create model # HRNet-W32: base_channel=32 # HRNet-W48: base_channel=48 model = HighResolutionNet(base_channel=32) weights = torch.load(weights_path, map_location=device) weights = weights if "model" not in weights else weights["model"] model.load_state_dict(weights) model.to(device) model.eval() with torch.inference_mode(): outputs = model(img_tensor.to(device)) if flip_test: flip_tensor = transforms.flip_images(img_tensor) flip_outputs = torch.squeeze( transforms.flip_back(model(flip_tensor.to(device)), person_info["flip_pairs"]), ) # feature is not aligned, shift flipped heatmap for higher accuracy # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22 flip_outputs[..., 1:] = flip_outputs.clone()[..., 0: -1] outputs = (outputs + flip_outputs) * 0.5 keypoints, scores = transforms.get_final_preds(outputs, [target["reverse_trans"]], True) keypoints = np.squeeze(keypoints) scores = np.squeeze(scores) plot_img = draw_keypoints(img, keypoints, scores, thresh=0.2, r=3) plt.imshow(plot_img) plt.show() plot_img.save("test_result.jpg") if __name__ == '__main__': predict_single_person() ================================================ FILE: pytorch_keypoint/HRNet/requirements.txt ================================================ numpy opencv_python==4.5.4.60 lxml torch==1.10.1 torchvision==0.11.1 pycocotools matplotlib tqdm ================================================ FILE: pytorch_keypoint/HRNet/train.py ================================================ import json import os import datetime import torch from torch.utils import data import numpy as np import transforms from model import HighResolutionNet from my_dataset_coco import CocoKeypoint from train_utils import train_eval_utils as utils def create_model(num_joints, load_pretrain_weights=True): model = HighResolutionNet(base_channel=32, num_joints=num_joints) if load_pretrain_weights: # 载入预训练模型权重 # 链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o weights_dict = torch.load("./hrnet_w32.pth", map_location='cpu') for k in list(weights_dict.keys()): # 如果载入的是imagenet权重,就删除无用权重 if ("head" in k) or ("fc" in k): del weights_dict[k] # 如果载入的是coco权重,对比下num_joints,如果不相等就删除 if "final_layer" in k: if weights_dict[k].shape[0] != num_joints: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0: print("missing_keys: ", missing_keys) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) with open(args.keypoints_path, "r") as f: person_kps_info = json.load(f) fixed_size = args.fixed_size heatmap_hw = (args.fixed_size[0] // 4, args.fixed_size[1] // 4) kps_weights = np.array(person_kps_info["kps_weights"], dtype=np.float32).reshape((args.num_joints,)) data_transform = { "train": transforms.Compose([ transforms.HalfBody(0.3, person_kps_info["upper_body_ids"], person_kps_info["lower_body_ids"]), transforms.AffineTransform(scale=(0.65, 1.35), rotation=(-45, 45), fixed_size=fixed_size), transforms.RandomHorizontalFlip(0.5, person_kps_info["flip_pairs"]), transforms.KeypointToHeatMap(heatmap_hw=heatmap_hw, gaussian_sigma=2, keypoints_weights=kps_weights), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=fixed_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } data_root = args.data_path # load train data set # coco2017 -> annotations -> person_keypoints_train2017.json train_dataset = CocoKeypoint(data_root, "train", transforms=data_transform["train"], fixed_size=args.fixed_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) train_data_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # coco2017 -> annotations -> person_keypoints_val2017.json val_dataset = CocoKeypoint(data_root, "val", transforms=data_transform["val"], fixed_size=args.fixed_size, det_json_path=args.person_det) val_data_loader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model model = create_model(num_joints=args.num_joints) # print(model) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if args.resume != "": checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) print("the training process from epoch{}...".format(args.start_epoch)) train_loss = [] learning_rate = [] val_map = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 50 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device=device, epoch=epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_loader, device=device, flip=True, flip_pairs=person_kps_info["flip_pairs"]) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # @0.5 mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录(coco2017) parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # COCO数据集人体关键点信息 parser.add_argument('--keypoints-path', default="./person_keypoints.json", type=str, help='person_keypoints.json path') # 原项目提供的验证集person检测信息,如果要使用GT信息,直接将该参数置为None,建议设置成None parser.add_argument('--person-det', type=str, default=None) parser.add_argument('--fixed-size', default=[256, 192], nargs='+', type=int, help='input size') # keypoints点数 parser.add_argument('--num-joints', default=17, type=int, help='num_joints') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start-epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=210, type=int, metavar='N', help='number of total epochs to run') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[170, 200], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 学习率 parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # AdamW的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练的batch size parser.add_argument('--batch-size', default=32, type=int, metavar='N', help='batch size when training.') # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_keypoint/HRNet/train_multi_GPU.py ================================================ import json import time import os import datetime import torch from torch.utils import data import numpy as np import transforms from model import HighResolutionNet from my_dataset_coco import CocoKeypoint import train_utils.train_eval_utils as utils from train_utils import init_distributed_mode, save_on_master, mkdir def create_model(num_joints, load_pretrain_weights=True): model = HighResolutionNet(base_channel=32, num_joints=num_joints) if load_pretrain_weights: # 载入预训练模型权重 # 链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o weights_dict = torch.load("./hrnet_w32.pth", map_location='cpu') for k in list(weights_dict.keys()): # 如果载入的是imagenet权重,就删除无用权重 if ("head" in k) or ("fc" in k): del weights_dict[k] # 如果载入的是coco权重,对比下num_joints,如果不相等就删除 if "final_layer" in k: if weights_dict[k].shape[0] != num_joints: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0: print("missing_keys: ", missing_keys) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # 用来保存coco_info的文件 now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") key_results_file = f"results{now}.txt" with open(args.keypoints_path, "r") as f: person_kps_info = json.load(f) fixed_size = args.fixed_size heatmap_hw = (args.fixed_size[0] // 4, args.fixed_size[1] // 4) kps_weights = np.array(person_kps_info["kps_weights"], dtype=np.float32).reshape((args.num_joints,)) data_transform = { "train": transforms.Compose([ transforms.HalfBody(0.3, person_kps_info["upper_body_ids"], person_kps_info["lower_body_ids"]), transforms.AffineTransform(scale=(0.65, 1.35), rotation=(-45, 45), fixed_size=fixed_size), transforms.RandomHorizontalFlip(0.5, person_kps_info["flip_pairs"]), transforms.KeypointToHeatMap(heatmap_hw=heatmap_hw, gaussian_sigma=2, keypoints_weights=kps_weights), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=fixed_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } data_root = args.data_path # load train data set # coco2017 -> annotations -> person_keypoints_train2017.json train_dataset = CocoKeypoint(data_root, "train", transforms=data_transform["train"], fixed_size=args.fixed_size) # load validation data set # coco2017 -> annotations -> person_keypoints_val2017.json val_dataset = CocoKeypoint(data_root, "val", transforms=data_transform["val"], fixed_size=args.fixed_size, det_json_path=args.person_det) print("Creating data loaders") if args.distributed: train_sampler = data.distributed.DistributedSampler(train_dataset) test_sampler = data.distributed.DistributedSampler(val_dataset) else: train_sampler = data.RandomSampler(train_dataset) test_sampler = data.SequentialSampler(val_dataset) train_batch_sampler = data.BatchSampler(train_sampler, args.batch_size, drop_last=True) data_loader = data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) data_loader_test = data.DataLoader(val_dataset, batch_size=args.batch_size, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + classes model = create_model(num_joints=args.num_joints) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: utils.evaluate(model, data_loader_test, device=device, flip=True, flip_pairs=person_kps_info["flip_pairs"]) return train_loss = [] learning_rate = [] val_map = [] print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, warmup=True, scaler=scaler) # update learning rate lr_scheduler.step() # evaluate after every epoch key_info = utils.evaluate(model, data_loader_test, device=device, flip=True, flip_pairs=person_kps_info["flip_pairs"]) # 只在主进程上进行写操作 if args.rank in [-1, 0]: train_loss.append(mean_loss.item()) learning_rate.append(lr) val_map.append(key_info[1]) # @0.5 mAP # write into txt with open(key_results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in key_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") if args.output_dir: # 只在主进程上执行保存权重操作 save_files = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() save_on_master(save_files, os.path.join(args.output_dir, f'model_{epoch}.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if args.rank in [-1, 0]: # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(coco2017) parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # COCO数据集人体关键点信息 parser.add_argument('--keypoints-path', default="./person_keypoints.json", type=str, help='person_keypoints.json path') # 原项目提供的验证集person检测信息,如果要使用GT信息,直接将该参数置为None,建议设置成None parser.add_argument('--person-det', type=str, default=None) parser.add_argument('--fixed-size', default=[256, 192], nargs='+', type=int, help='input size') # 检测目标类别数(不包含背景) parser.add_argument('--num-joints', default=17, type=int, help='num_joints(num_keypoints)') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=32, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start-epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=210, type=int, metavar='N', help='number of total epochs to run') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 学习率 parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate, 0.001 is the default value for training ' 'on 4 gpus and 32 images_per_gpu') # AdamW的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[170, 200], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=50, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--test-only', action="store_true", help="test only") # 开启的进程数(注意不是线程) parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') parser.add_argument("--sync-bn", action="store_true", help="Use sync batch norm") # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_keypoint/HRNet/train_utils/__init__.py ================================================ from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups from .distributed_utils import init_distributed_mode, save_on_master, mkdir from .coco_eval import EvalCOCOMetric from .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api ================================================ FILE: pytorch_keypoint/HRNet/train_utils/coco_eval.py ================================================ import json import copy from PIL import Image, ImageDraw import numpy as np from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from .distributed_utils import all_gather, is_main_process from transforms import affine_points def merge(img_ids, eval_results): """将多个进程之间的数据汇总在一起""" all_img_ids = all_gather(img_ids) all_eval_results = all_gather(eval_results) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_results = [] for p in all_eval_results: merged_eval_results.extend(p) merged_img_ids = np.array(merged_img_ids) # keep only unique (and in sorted order) images # 去除重复的图片索引,多GPU训练时为了保证每个进程的训练图片数量相同,可能将一张图片分配给多个进程 merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_results = [merged_eval_results[i] for i in idx] return list(merged_img_ids), merged_eval_results class EvalCOCOMetric: def __init__(self, coco: COCO = None, iou_type: str = "keypoints", results_file_name: str = "predict_results.json", classes_mapping: dict = None, threshold: float = 0.2): self.coco = copy.deepcopy(coco) self.obj_ids = [] # 记录每个进程处理目标(person)的ids self.results = [] self.aggregation_results = None self.classes_mapping = classes_mapping self.coco_evaluator = None assert iou_type in ["keypoints"] self.iou_type = iou_type self.results_file_name = results_file_name self.threshold = threshold def plot_img(self, img_path, keypoints, r=3): img = Image.open(img_path) draw = ImageDraw.Draw(img) for i, point in enumerate(keypoints): draw.ellipse([point[0] - r, point[1] - r, point[0] + r, point[1] + r], fill=(255, 0, 0)) img.show() def prepare_for_coco_keypoints(self, targets, outputs): # 遍历每个person的预测结果(注意这里不是每张,一张图片里可能有多个person) for target, keypoints, scores in zip(targets, outputs[0], outputs[1]): if len(keypoints) == 0: continue obj_idx = int(target["obj_index"]) if obj_idx in self.obj_ids: # 防止出现重复的数据 continue self.obj_ids.append(obj_idx) # self.plot_img(target["image_path"], keypoints) mask = np.greater(scores, 0.2) if mask.sum() == 0: k_score = 0 else: k_score = np.mean(scores[mask]) keypoints = np.concatenate([keypoints, scores], axis=1) keypoints = np.reshape(keypoints, -1) # We recommend rounding coordinates to the nearest tenth of a pixel # to reduce resulting JSON file size. keypoints = [round(k, 2) for k in keypoints.tolist()] res = {"image_id": target["image_id"], "category_id": 1, # person "keypoints": keypoints, "score": target["score"] * k_score} self.results.append(res) def update(self, targets, outputs): if self.iou_type == "keypoints": self.prepare_for_coco_keypoints(targets, outputs) else: raise KeyError(f"not support iou_type: {self.iou_type}") def synchronize_results(self): # 同步所有进程中的数据 eval_ids, eval_results = merge(self.obj_ids, self.results) self.aggregation_results = {"obj_ids": eval_ids, "results": eval_results} # 主进程上保存即可 if is_main_process(): # results = [] # [results.extend(i) for i in eval_results] # write predict results into json file json_str = json.dumps(eval_results, indent=4) with open(self.results_file_name, 'w') as json_file: json_file.write(json_str) def evaluate(self): # 只在主进程上评估即可 if is_main_process(): # accumulate predictions from all images coco_true = self.coco coco_pre = coco_true.loadRes(self.results_file_name) self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type) self.coco_evaluator.evaluate() self.coco_evaluator.accumulate() print(f"IoU metric: {self.iou_type}") self.coco_evaluator.summarize() coco_info = self.coco_evaluator.stats.tolist() # numpy to list return coco_info else: return None ================================================ FILE: pytorch_keypoint/HRNet/train_utils/coco_utils.py ================================================ import torch import torch.utils.data from pycocotools import mask as coco_mask from pycocotools.coco import COCO def coco_remove_images_without_annotations(dataset, ids): """ 删除coco数据集中没有目标,或者目标面积非常小的数据 refer to: https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py :param dataset: :param cat_list: :return: """ def _has_only_empty_bbox(anno): return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) def _has_valid_annotation(anno): # if it's empty, there is no annotation if len(anno) == 0: return False # if all boxes have close to zero area, there is no annotation if _has_only_empty_bbox(anno): return False return True valid_ids = [] for ds_idx, img_id in enumerate(ids): ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None) anno = dataset.loadAnns(ann_ids) if _has_valid_annotation(anno): valid_ids.append(img_id) return valid_ids def convert_coco_poly_mask(segmentations, height, width): masks = [] for polygons in segmentations: rles = coco_mask.frPyObjects(polygons, height, width) mask = coco_mask.decode(rles) if len(mask.shape) < 3: mask = mask[..., None] mask = torch.as_tensor(mask, dtype=torch.uint8) mask = mask.any(dim=2) masks.append(mask) if masks: masks = torch.stack(masks, dim=0) else: # 如果mask为空,则说明没有目标,直接返回数值为0的mask masks = torch.zeros((0, height, width), dtype=torch.uint8) return masks def convert_to_coco_api(self): coco_ds = COCO() # annotation IDs need to start at 1, not 0, see torchvision issue #1530 ann_id = 1 dataset = {"images": [], "categories": [], "annotations": []} categories = set() for img_idx in range(len(self)): targets, h, w = self.get_annotations(img_idx) img_id = targets["image_id"].item() img_dict = {"id": img_id, "height": h, "width": w} dataset["images"].append(img_dict) bboxes = targets["boxes"].clone() # convert (x_min, ymin, xmax, ymax) to (xmin, ymin, w, h) bboxes[:, 2:] -= bboxes[:, :2] bboxes = bboxes.tolist() labels = targets["labels"].tolist() areas = targets["area"].tolist() iscrowd = targets["iscrowd"].tolist() if "masks" in targets: masks = targets["masks"] # make masks Fortran contiguous for coco_mask masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) num_objs = len(bboxes) for i in range(num_objs): ann = {"image_id": img_id, "bbox": bboxes[i], "category_id": labels[i], "area": areas[i], "iscrowd": iscrowd[i], "id": ann_id} categories.add(labels[i]) if "masks" in targets: ann["segmentation"] = coco_mask.encode(masks[i].numpy()) dataset["annotations"].append(ann) ann_id += 1 dataset["categories"] = [{"id": i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds ================================================ FILE: pytorch_keypoint/HRNet/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ 收集各个进程中的数据 Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() # 进程数 if world_size == 1: return [data] data_list = [None] * world_size dist.all_gather_object(data_list, data) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = int(iter_time.global_avg * (len(iterable) - i)) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_keypoint/HRNet/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k+1等份 bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_keypoint/HRNet/train_utils/loss.py ================================================ import torch class KpLoss(object): def __init__(self): self.criterion = torch.nn.MSELoss(reduction='none') def __call__(self, logits, targets): assert len(logits.shape) == 4, 'logits should be 4-ndim' device = logits.device bs = logits.shape[0] # [num_kps, H, W] -> [B, num_kps, H, W] heatmaps = torch.stack([t["heatmap"].to(device) for t in targets]) # [num_kps] -> [B, num_kps] kps_weights = torch.stack([t["kps_weights"].to(device) for t in targets]) # [B, num_kps, H, W] -> [B, num_kps] loss = self.criterion(logits, heatmaps).mean(dim=[2, 3]) loss = torch.sum(loss * kps_weights) / bs return loss ================================================ FILE: pytorch_keypoint/HRNet/train_utils/train_eval_utils.py ================================================ import math import sys import time import torch import transforms import train_utils.distributed_utils as utils from .coco_eval import EvalCOCOMetric from .loss import KpLoss def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mse = KpLoss() mloss = torch.zeros(1).to(device) # mean losses for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)): images = torch.stack([image.to(device) for image in images]) # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with torch.cuda.amp.autocast(enabled=scaler is not None): results = model(images) losses = mse(results, targets) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict({"losses": losses}) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() if scaler is not None: scaler.scale(losses).backward() scaler.step(optimizer) scaler.update() else: losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() metric_logger.update(loss=losses_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, device, flip=False, flip_pairs=None): if flip: assert flip_pairs is not None, "enable flip must provide flip_pairs." model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " key_metric = EvalCOCOMetric(data_loader.dataset.coco, "keypoints", "key_results.json") for image, targets in metric_logger.log_every(data_loader, 100, header): images = torch.stack([img.to(device) for img in image]) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(images) if flip: flipped_images = transforms.flip_images(images) flipped_outputs = model(flipped_images) flipped_outputs = transforms.flip_back(flipped_outputs, flip_pairs) # feature is not aligned, shift flipped heatmap for higher accuracy # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22 flipped_outputs[..., 1:] = flipped_outputs.clone()[..., 0:-1] outputs = (outputs + flipped_outputs) * 0.5 model_time = time.time() - model_time # decode keypoint reverse_trans = [t["reverse_trans"] for t in targets] outputs = transforms.get_final_preds(outputs, reverse_trans, post_processing=True) key_metric.update(targets, outputs) metric_logger.update(model_time=model_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # 同步所有进程中的数据 key_metric.synchronize_results() if utils.is_main_process(): coco_info = key_metric.evaluate() else: coco_info = None return coco_info ================================================ FILE: pytorch_keypoint/HRNet/transforms.py ================================================ import math import random from typing import Tuple import cv2 import numpy as np import torch from torchvision.transforms import functional as F import matplotlib.pyplot as plt def flip_images(img): assert len(img.shape) == 4, 'images has to be [batch_size, channels, height, width]' img = torch.flip(img, dims=[3]) return img def flip_back(output_flipped, matched_parts): assert len(output_flipped.shape) == 4, 'output_flipped has to be [batch_size, num_joints, height, width]' output_flipped = torch.flip(output_flipped, dims=[3]) for pair in matched_parts: tmp = output_flipped[:, pair[0]].clone() output_flipped[:, pair[0]] = output_flipped[:, pair[1]] output_flipped[:, pair[1]] = tmp return output_flipped def get_max_preds(batch_heatmaps): """ get predictions from score maps heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) """ assert isinstance(batch_heatmaps, torch.Tensor), 'batch_heatmaps should be torch.Tensor' assert len(batch_heatmaps.shape) == 4, 'batch_images should be 4-ndim' batch_size, num_joints, h, w = batch_heatmaps.shape heatmaps_reshaped = batch_heatmaps.reshape(batch_size, num_joints, -1) maxvals, idx = torch.max(heatmaps_reshaped, dim=2) maxvals = maxvals.unsqueeze(dim=-1) idx = idx.float() preds = torch.zeros((batch_size, num_joints, 2)).to(batch_heatmaps) preds[:, :, 0] = idx % w # column 对应最大值的x坐标 preds[:, :, 1] = torch.floor(idx / w) # row 对应最大值的y坐标 pred_mask = torch.gt(maxvals, 0.0).repeat(1, 1, 2).float().to(batch_heatmaps.device) preds *= pred_mask return preds, maxvals def affine_points(pt, t): ones = np.ones((pt.shape[0], 1), dtype=float) pt = np.concatenate([pt, ones], axis=1).T new_pt = np.dot(t, pt) return new_pt.T def get_final_preds(batch_heatmaps: torch.Tensor, trans: list = None, post_processing: bool = False): assert trans is not None coords, maxvals = get_max_preds(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing if post_processing: for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = torch.tensor( [ hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px] ] ).to(batch_heatmaps.device) coords[n][p] += torch.sign(diff) * .25 preds = coords.clone().cpu().numpy() # Transform back for i in range(coords.shape[0]): preds[i] = affine_points(preds[i], trans[i]) return preds, maxvals.cpu().numpy() def decode_keypoints(outputs, origin_hw, num_joints: int = 17): keypoints = [] scores = [] heatmap_h, heatmap_w = outputs.shape[-2:] for i in range(num_joints): pt = np.unravel_index(np.argmax(outputs[i]), (heatmap_h, heatmap_w)) score = outputs[i, pt[0], pt[1]] keypoints.append(pt[::-1]) # hw -> wh(xy) scores.append(score) keypoints = np.array(keypoints, dtype=float) scores = np.array(scores, dtype=float) # convert to full image scale keypoints[:, 0] = np.clip(keypoints[:, 0] / heatmap_w * origin_hw[1], a_min=0, a_max=origin_hw[1]) keypoints[:, 1] = np.clip(keypoints[:, 1] / heatmap_h * origin_hw[0], a_min=0, a_max=origin_hw[0]) return keypoints, scores def resize_pad(img: np.ndarray, size: tuple): h, w, c = img.shape src = np.array([[0, 0], # 原坐标系中图像左上角点 [w - 1, 0], # 原坐标系中图像右上角点 [0, h - 1]], # 原坐标系中图像左下角点 dtype=np.float32) dst = np.zeros((3, 2), dtype=np.float32) if h / w > size[0] / size[1]: # 需要在w方向padding wi = size[0] * (w / h) pad_w = (size[1] - wi) / 2 dst[0, :] = [pad_w - 1, 0] # 目标坐标系中图像左上角点 dst[1, :] = [size[1] - pad_w - 1, 0] # 目标坐标系中图像右上角点 dst[2, :] = [pad_w - 1, size[0] - 1] # 目标坐标系中图像左下角点 else: # 需要在h方向padding hi = size[1] * (h / w) pad_h = (size[0] - hi) / 2 dst[0, :] = [0, pad_h - 1] # 目标坐标系中图像左上角点 dst[1, :] = [size[1] - 1, pad_h - 1] # 目标坐标系中图像右上角点 dst[2, :] = [0, size[0] - pad_h - 1] # 目标坐标系中图像左下角点 trans = cv2.getAffineTransform(src, dst) # 计算正向仿射变换矩阵 # 对图像进行仿射变换 resize_img = cv2.warpAffine(img, trans, size[::-1], # w, h flags=cv2.INTER_LINEAR) # import matplotlib.pyplot as plt # plt.imshow(resize_img) # plt.show() dst /= 4 # 网络预测的heatmap尺寸是输入图像的1/4 reverse_trans = cv2.getAffineTransform(dst, src) # 计算逆向仿射变换矩阵,方便后续还原 return resize_img, reverse_trans def adjust_box(xmin: float, ymin: float, w: float, h: float, fixed_size: Tuple[float, float]): """通过增加w或者h的方式保证输入图片的长宽比固定""" xmax = xmin + w ymax = ymin + h hw_ratio = fixed_size[0] / fixed_size[1] if h / w > hw_ratio: # 需要在w方向padding wi = h / hw_ratio pad_w = (wi - w) / 2 xmin = xmin - pad_w xmax = xmax + pad_w else: # 需要在h方向padding hi = w * hw_ratio pad_h = (hi - h) / 2 ymin = ymin - pad_h ymax = ymax + pad_h return xmin, ymin, xmax, ymax def scale_box(xmin: float, ymin: float, w: float, h: float, scale_ratio: Tuple[float, float]): """根据传入的h、w缩放因子scale_ratio,重新计算xmin,ymin,w,h""" s_h = h * scale_ratio[0] s_w = w * scale_ratio[1] xmin = xmin - (s_w - w) / 2. ymin = ymin - (s_h - h) / 2. return xmin, ymin, s_w, s_h def plot_heatmap(image, heatmap, kps, kps_weights): for kp_id in range(len(kps_weights)): if kps_weights[kp_id] > 0: plt.subplot(1, 2, 1) plt.imshow(image) plt.plot(*kps[kp_id].tolist(), "ro") plt.title("image") plt.subplot(1, 2, 2) plt.imshow(heatmap[kp_id], cmap=plt.cm.Blues) plt.colorbar(ticks=[0, 1]) plt.title(f"kp_id: {kp_id}") plt.show() class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class ToTensor(object): """将PIL图像转为Tensor""" def __call__(self, image, target): image = F.to_tensor(image) return image, target class Normalize(object): def __init__(self, mean=None, std=None): self.mean = mean self.std = std def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target class HalfBody(object): def __init__(self, p: float = 0.3, upper_body_ids=None, lower_body_ids=None): assert upper_body_ids is not None assert lower_body_ids is not None self.p = p self.upper_body_ids = upper_body_ids self.lower_body_ids = lower_body_ids def __call__(self, image, target): if random.random() < self.p: kps = target["keypoints"] vis = target["visible"] upper_kps = [] lower_kps = [] # 对可见的keypoints进行归类 for i, v in enumerate(vis): if v > 0.5: if i in self.upper_body_ids: upper_kps.append(kps[i]) else: lower_kps.append(kps[i]) # 50%的概率选择上或下半身 if random.random() < 0.5: selected_kps = upper_kps else: selected_kps = lower_kps # 如果点数太少就不做任何处理 if len(selected_kps) > 2: selected_kps = np.array(selected_kps, dtype=np.float32) xmin, ymin = np.min(selected_kps, axis=0).tolist() xmax, ymax = np.max(selected_kps, axis=0).tolist() w = xmax - xmin h = ymax - ymin if w > 1 and h > 1: # 把w和h适当放大点,要不然关键点处于边缘位置 xmin, ymin, w, h = scale_box(xmin, ymin, w, h, (1.5, 1.5)) target["box"] = [xmin, ymin, w, h] return image, target class AffineTransform(object): """scale+rotation""" def __init__(self, scale: Tuple[float, float] = None, # e.g. (0.65, 1.35) rotation: Tuple[int, int] = None, # e.g. (-45, 45) fixed_size: Tuple[int, int] = (256, 192)): self.scale = scale self.rotation = rotation self.fixed_size = fixed_size def __call__(self, img, target): src_xmin, src_ymin, src_xmax, src_ymax = adjust_box(*target["box"], fixed_size=self.fixed_size) src_w = src_xmax - src_xmin src_h = src_ymax - src_ymin src_center = np.array([(src_xmin + src_xmax) / 2, (src_ymin + src_ymax) / 2]) src_p2 = src_center + np.array([0, -src_h / 2]) # top middle src_p3 = src_center + np.array([src_w / 2, 0]) # right middle dst_center = np.array([(self.fixed_size[1] - 1) / 2, (self.fixed_size[0] - 1) / 2]) dst_p2 = np.array([(self.fixed_size[1] - 1) / 2, 0]) # top middle dst_p3 = np.array([self.fixed_size[1] - 1, (self.fixed_size[0] - 1) / 2]) # right middle if self.scale is not None: scale = random.uniform(*self.scale) src_w = src_w * scale src_h = src_h * scale src_p2 = src_center + np.array([0, -src_h / 2]) # top middle src_p3 = src_center + np.array([src_w / 2, 0]) # right middle if self.rotation is not None: angle = random.randint(*self.rotation) # 角度制 angle = angle / 180 * math.pi # 弧度制 src_p2 = src_center + np.array([src_h / 2 * math.sin(angle), -src_h / 2 * math.cos(angle)]) src_p3 = src_center + np.array([src_w / 2 * math.cos(angle), src_w / 2 * math.sin(angle)]) src = np.stack([src_center, src_p2, src_p3]).astype(np.float32) dst = np.stack([dst_center, dst_p2, dst_p3]).astype(np.float32) trans = cv2.getAffineTransform(src, dst) # 计算正向仿射变换矩阵 dst /= 4 # 网络预测的heatmap尺寸是输入图像的1/4 reverse_trans = cv2.getAffineTransform(dst, src) # 计算逆向仿射变换矩阵,方便后续还原 # 对图像进行仿射变换 resize_img = cv2.warpAffine(img, trans, tuple(self.fixed_size[::-1]), # [w, h] flags=cv2.INTER_LINEAR) if "keypoints" in target: kps = target["keypoints"] mask = np.logical_and(kps[:, 0] != 0, kps[:, 1] != 0) kps[mask] = affine_points(kps[mask], trans) target["keypoints"] = kps # import matplotlib.pyplot as plt # from draw_utils import draw_keypoints # resize_img = draw_keypoints(resize_img, target["keypoints"]) # plt.imshow(resize_img) # plt.show() target["trans"] = trans target["reverse_trans"] = reverse_trans return resize_img, target class RandomHorizontalFlip(object): """随机对输入图片进行水平翻转,注意该方法必须接在 AffineTransform 后""" def __init__(self, p: float = 0.5, matched_parts: list = None): assert matched_parts is not None self.p = p self.matched_parts = matched_parts def __call__(self, image, target): if random.random() < self.p: # [h, w, c] image = np.ascontiguousarray(np.flip(image, axis=[1])) keypoints = target["keypoints"] visible = target["visible"] width = image.shape[1] # Flip horizontal keypoints[:, 0] = width - keypoints[:, 0] - 1 # Change left-right parts for pair in self.matched_parts: keypoints[pair[0], :], keypoints[pair[1], :] = \ keypoints[pair[1], :], keypoints[pair[0], :].copy() visible[pair[0]], visible[pair[1]] = \ visible[pair[1]], visible[pair[0]].copy() target["keypoints"] = keypoints target["visible"] = visible return image, target class KeypointToHeatMap(object): def __init__(self, heatmap_hw: Tuple[int, int] = (256 // 4, 192 // 4), gaussian_sigma: int = 2, keypoints_weights=None): self.heatmap_hw = heatmap_hw self.sigma = gaussian_sigma self.kernel_radius = self.sigma * 3 self.use_kps_weights = False if keypoints_weights is None else True self.kps_weights = keypoints_weights # generate gaussian kernel(not normalized) kernel_size = 2 * self.kernel_radius + 1 kernel = np.zeros((kernel_size, kernel_size), dtype=np.float32) x_center = y_center = kernel_size // 2 for x in range(kernel_size): for y in range(kernel_size): kernel[y, x] = np.exp(-((x - x_center) ** 2 + (y - y_center) ** 2) / (2 * self.sigma ** 2)) # print(kernel) self.kernel = kernel def __call__(self, image, target): kps = target["keypoints"] num_kps = kps.shape[0] kps_weights = np.ones((num_kps,), dtype=np.float32) if "visible" in target: visible = target["visible"] kps_weights = visible heatmap = np.zeros((num_kps, self.heatmap_hw[0], self.heatmap_hw[1]), dtype=np.float32) heatmap_kps = (kps / 4 + 0.5).astype(np.int) # round for kp_id in range(num_kps): v = kps_weights[kp_id] if v < 0.5: # 如果该点的可见度很低,则直接忽略 continue x, y = heatmap_kps[kp_id] ul = [x - self.kernel_radius, y - self.kernel_radius] # up-left x,y br = [x + self.kernel_radius, y + self.kernel_radius] # bottom-right x,y # 如果以xy为中心kernel_radius为半径的辐射范围内与heatmap没交集,则忽略该点(该规则并不严格) if ul[0] > self.heatmap_hw[1] - 1 or \ ul[1] > self.heatmap_hw[0] - 1 or \ br[0] < 0 or \ br[1] < 0: # If not, just return the image as is kps_weights[kp_id] = 0 continue # Usable gaussian range # 计算高斯核有效区域(高斯核坐标系) g_x = (max(0, -ul[0]), min(br[0], self.heatmap_hw[1] - 1) - ul[0]) g_y = (max(0, -ul[1]), min(br[1], self.heatmap_hw[0] - 1) - ul[1]) # image range # 计算heatmap中的有效区域(heatmap坐标系) img_x = (max(0, ul[0]), min(br[0], self.heatmap_hw[1] - 1)) img_y = (max(0, ul[1]), min(br[1], self.heatmap_hw[0] - 1)) if kps_weights[kp_id] > 0.5: # 将高斯核有效区域复制到heatmap对应区域 heatmap[kp_id][img_y[0]:img_y[1] + 1, img_x[0]:img_x[1] + 1] = \ self.kernel[g_y[0]:g_y[1] + 1, g_x[0]:g_x[1] + 1] if self.use_kps_weights: kps_weights = np.multiply(kps_weights, self.kps_weights) # plot_heatmap(image, heatmap, kps, kps_weights) target["heatmap"] = torch.as_tensor(heatmap, dtype=torch.float32) target["kps_weights"] = torch.as_tensor(kps_weights, dtype=torch.float32) return image, target ================================================ FILE: pytorch_keypoint/HRNet/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 """ import os import json import torch from tqdm import tqdm import numpy as np from model import HighResolutionNet from train_utils import EvalCOCOMetric from my_dataset_coco import CocoKeypoint import transforms def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 10, [""] * 10 stats[0], print_list[0] = _summarize(1, maxDets=20) stats[1], print_list[1] = _summarize(1, maxDets=20, iouThr=.5) stats[2], print_list[2] = _summarize(1, maxDets=20, iouThr=.75) stats[3], print_list[3] = _summarize(1, maxDets=20, areaRng='medium') stats[4], print_list[4] = _summarize(1, maxDets=20, areaRng='large') stats[5], print_list[5] = _summarize(0, maxDets=20) stats[6], print_list[6] = _summarize(0, maxDets=20, iouThr=.5) stats[7], print_list[7] = _summarize(0, maxDets=20, iouThr=.75) stats[8], print_list[8] = _summarize(0, maxDets=20, areaRng='medium') stats[9], print_list[9] = _summarize(0, maxDets=20, areaRng='large') print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def save_info(coco_evaluator, save_name: str = "record_mAP.txt"): # calculate COCO info for all keypoints coco_stats, print_coco = summarize(coco_evaluator) # 将验证结果保存至txt文件中 with open(save_name, "w") as f: record_lines = ["COCO results:", print_coco] f.write("\n".join(record_lines)) def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = { "val": transforms.Compose([ transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=args.resize_hw), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } # read class_indict label_json_path = args.label_json_path assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: person_coco_info = json.load(f) data_root = args.data_path # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = CocoKeypoint(data_root, "val", transforms=data_transform["val"], det_json_path=None) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt", transforms=data_transform["val"]) val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model model = HighResolutionNet() # 载入你自己训练好的模型权重 weights_path = args.weights_path assert os.path.exists(weights_path), "not found {} file.".format(weights_path) model.load_state_dict(torch.load(weights_path, map_location='cpu')) # print(model) model.to(device) # evaluate on the val dataset key_metric = EvalCOCOMetric(val_dataset.coco, "keypoints", "key_results.json") model.eval() with torch.no_grad(): for images, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device images = images.to(device) # inference outputs = model(images) if args.flip: flipped_images = transforms.flip_images(images) flipped_outputs = model(flipped_images) flipped_outputs = transforms.flip_back(flipped_outputs, person_coco_info["flip_pairs"]) # feature is not aligned, shift flipped heatmap for higher accuracy # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22 flipped_outputs[..., 1:] = flipped_outputs.clone()[..., 0:-1] outputs = (outputs + flipped_outputs) * 0.5 # decode keypoint reverse_trans = [t["reverse_trans"] for t in targets] outputs = transforms.get_final_preds(outputs, reverse_trans, post_processing=True) key_metric.update(targets, outputs) key_metric.synchronize_results() key_metric.evaluate() save_info(key_metric.coco_evaluator, "keypoint_record_mAP.txt") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda:0', help='device') parser.add_argument('--resize-hw', type=list, default=[256, 192], help="resize for predict") # 是否开启图像翻转 parser.add_argument('--flip', type=bool, default=True, help='whether using flipped images') # 数据集的根目录 parser.add_argument('--data-path', default='/data/coco2017', help='dataset root') # 训练好的权重文件 parser.add_argument('--weights-path', default='./pose_hrnet_w32_256x192.pth', type=str, help='training weights') # batch size parser.add_argument('--batch-size', default=1, type=int, metavar='N', help='batch size when validation.') # 类别索引和类别名称对应关系 parser.add_argument('--label-json-path', type=str, default="person_keypoints.json") # 原项目提供的验证集person检测信息,如果要使用GT信息,直接将该参数置为None parser.add_argument('--person-det', type=str, default="./COCO_val2017_detections_AP_H_56_person.json") args = parser.parse_args() main(args) ================================================ FILE: pytorch_object_detection/faster_rcnn/README.md ================================================ # Faster R-CNN ## 该项目主要是来自pytorch官方torchvision模块中的源码 * https://github.com/pytorch/vision/tree/master/torchvision/models/detection ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.7.1(注意:必须是1.6.0或以上,因为使用官方提供的混合精度训练1.6.0后才支持) * pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs)) * Ubuntu或Centos(不建议Windows) * 最好使用GPU训练 * 详细环境配置见`requirements.txt` ## 文件结构: ``` ├── backbone: 特征提取网络,可以根据自己的要求选择 ├── network_files: Faster R-CNN网络(包括Fast R-CNN以及RPN等模块) ├── train_utils: 训练验证相关模块(包括cocotools) ├── my_dataset.py: 自定义dataset用于读取VOC数据集 ├── train_mobilenet.py: 以MobileNetV2做为backbone进行训练 ├── train_resnet50_fpn.py: 以resnet50+FPN做为backbone进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 └── pascal_voc_classes.json: pascal_voc标签文件 ``` ## 预训练权重下载地址(下载后放入backbone文件夹中): * MobileNetV2 weights(下载后重命名为`mobilenet_v2.pth`,然后放到`bakcbone`文件夹下): https://download.pytorch.org/models/mobilenet_v2-b0353104.pth * Resnet50 weights(下载后重命名为`resnet50.pth`,然后放到`bakcbone`文件夹下): https://download.pytorch.org/models/resnet50-0676ba61.pth * ResNet50+FPN weights: https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth * 注意,下载的预训练权重记得要重命名,比如在train_resnet50_fpn.py中读取的是`fasterrcnn_resnet50_fpn_coco.pth`文件, 不是`fasterrcnn_resnet50_fpn_coco-258fb6c6.pth`,然后放到当前项目根目录下即可。 ## 数据集,本例程使用的是PASCAL VOC2012数据集 * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的bilibili:https://b23.tv/F1kSCK * 使用ResNet50+FPN以及迁移学习在VOC2012数据集上得到的权重: 链接:https://pan.baidu.com/s/1ifilndFRtAV5RDZINSHj5w 提取码:dsz8 ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要训练mobilenetv2+fasterrcnn,直接使用train_mobilenet.py训练脚本 * 若要训练resnet50+fpn+fasterrcnn,直接使用train_resnet50_fpn.py训练脚本 * 若要使用多GPU训练,使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_GPU.py` ## 注意事项 * 在使用训练脚本时,注意要将`--data-path`(VOC_root)设置为自己存放`VOCdevkit`文件夹所在的**根目录** * 由于带有FPN结构的Faster RCNN很吃显存,如果GPU的显存不够(如果batch_size小于8的话)建议在create_model函数中使用默认的norm_layer, 即不传递norm_layer变量,默认去使用FrozenBatchNorm2d(即不会去更新参数的bn层),使用中发现效果也很好。 * 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 * 在使用预测脚本时,要将`train_weights`设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可,其他代码尽量不要改动 ## 如果对Faster RCNN原理不是很理解可参考我的bilibili * https://b23.tv/sXcBSP ## 进一步了解该项目,以及对Faster RCNN代码的分析可参考我的bilibili * https://b23.tv/HvMiDy ## Faster RCNN框架图 ![Faster R-CNN](fasterRCNN.png) ================================================ FILE: pytorch_object_detection/faster_rcnn/backbone/__init__.py ================================================ from .resnet50_fpn_model import resnet50_fpn_backbone from .mobilenetv2_model import MobileNetV2 from .vgg_model import vgg from .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN ================================================ FILE: pytorch_object_detection/faster_rcnn/backbone/feature_pyramid_network.py ================================================ from collections import OrderedDict import torch.nn as nn import torch from torch import Tensor import torch.nn.functional as F from torch.jit.annotations import Tuple, List, Dict class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Arguments: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model, return_layers): if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} layers = OrderedDict() # 遍历模型子模块按顺序存入有序字典 # 只保存layer4及其之前的结构,舍去之后不用的结构 for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super().__init__(layers) self.return_layers = orig_return_layers def forward(self, x): out = OrderedDict() # 依次遍历模型的所有子模块,并进行正向传播, # 收集layer1, layer2, layer3, layer4的输出 for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class FeaturePyramidNetwork(nn.Module): """ Module that adds a FPN from on top of a set of feature maps. This is based on `"Feature Pyramid Network for Object Detection" `_. The feature maps are currently supposed to be in increasing depth order. The input to the model is expected to be an OrderedDict[Tensor], containing the feature maps on top of which the FPN will be added. Arguments: in_channels_list (list[int]): number of channels for each feature map that is passed to the module out_channels (int): number of channels of the FPN representation extra_blocks (ExtraFPNBlock or None): if provided, extra operations will be performed. It is expected to take the fpn features, the original features and the names of the original features as input, and returns a new list of feature maps and their corresponding names """ def __init__(self, in_channels_list, out_channels, extra_blocks=None): super().__init__() # 用来调整resnet特征矩阵(layer1,2,3,4)的channel(kernel_size=1) self.inner_blocks = nn.ModuleList() # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵 self.layer_blocks = nn.ModuleList() for in_channels in in_channels_list: if in_channels == 0: continue inner_block_module = nn.Conv2d(in_channels, out_channels, 1) layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) self.inner_blocks.append(inner_block_module) self.layer_blocks.append(layer_block_module) # initialize parameters now to avoid modifying the initialization of top_blocks for m in self.children(): if isinstance(m, nn.Conv2d): nn.init.kaiming_uniform_(m.weight, a=1) nn.init.constant_(m.bias, 0) self.extra_blocks = extra_blocks def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.inner_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.inner_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.inner_blocks: if i == idx: out = module(x) i += 1 return out def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.layer_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.layer_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.layer_blocks: if i == idx: out = module(x) i += 1 return out def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: """ Computes the FPN for a set of feature maps. Arguments: x (OrderedDict[Tensor]): feature maps for each feature level. Returns: results (OrderedDict[Tensor]): feature maps after FPN layers. They are ordered from highest resolution first. """ # unpack OrderedDict into two lists for easier handling names = list(x.keys()) x = list(x.values()) # 将resnet layer4的channel调整到指定的out_channels # last_inner = self.inner_blocks[-1](x[-1]) last_inner = self.get_result_from_inner_blocks(x[-1], -1) # result中保存着每个预测特征层 results = [] # 将layer4调整channel后的特征矩阵,通过3x3卷积后得到对应的预测特征矩阵 # results.append(self.layer_blocks[-1](last_inner)) results.append(self.get_result_from_layer_blocks(last_inner, -1)) for idx in range(len(x) - 2, -1, -1): inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) feat_shape = inner_lateral.shape[-2:] inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") last_inner = inner_lateral + inner_top_down results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) # 在layer4对应的预测特征层基础上生成预测特征矩阵5 if self.extra_blocks is not None: results, names = self.extra_blocks(results, x, names) # make it back an OrderedDict out = OrderedDict([(k, v) for k, v in zip(names, results)]) return out class LastLevelMaxPool(torch.nn.Module): """ Applies a max_pool2d on top of the last feature map """ def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]: names.append("pool") x.append(F.max_pool2d(x[-1], 1, 2, 0)) # input, kernel_size, stride, padding return x, names class BackboneWithFPN(nn.Module): """ Adds a FPN on top of a model. Internally, it uses torchvision.models._utils.IntermediateLayerGetter to extract a submodel that returns the feature maps specified in return_layers. The same limitations of IntermediatLayerGetter apply here. Arguments: backbone (nn.Module) return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). in_channels_list (List[int]): number of channels for each feature map that is returned, in the order they are present in the OrderedDict out_channels (int): number of channels in the FPN. extra_blocks: ExtraFPNBlock Attributes: out_channels (int): the number of channels in the FPN """ def __init__(self, backbone: nn.Module, return_layers=None, in_channels_list=None, out_channels=256, extra_blocks=None, re_getter=True): super().__init__() if extra_blocks is None: extra_blocks = LastLevelMaxPool() if re_getter is True: assert return_layers is not None self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) else: self.body = backbone self.fpn = FeaturePyramidNetwork( in_channels_list=in_channels_list, out_channels=out_channels, extra_blocks=extra_blocks, ) self.out_channels = out_channels def forward(self, x): x = self.body(x) x = self.fpn(x) return x ================================================ FILE: pytorch_object_detection/faster_rcnn/backbone/mobilenetv2_model.py ================================================ from torch import nn import torch from torchvision.ops import misc def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(nn.Sequential): def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1, norm_layer=None): padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d super(ConvBNReLU, self).__init__( nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False), norm_layer(out_channel), nn.ReLU6(inplace=True) ) class InvertedResidual(nn.Module): def __init__(self, in_channel, out_channel, stride, expand_ratio, norm_layer=None): super(InvertedResidual, self).__init__() hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel if norm_layer is None: norm_layer = nn.BatchNorm2d layers = [] if expand_ratio != 1: # 1x1 pointwise conv layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1, norm_layer=norm_layer)) layers.extend([ # 3x3 depthwise conv ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel, norm_layer=norm_layer), # 1x1 pointwise conv(linear) nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False), norm_layer(out_channel), ]) self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_shortcut: return x + self.conv(x) else: return self.conv(x) class MobileNetV2(nn.Module): def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8, weights_path=None, norm_layer=None): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) if norm_layer is None: norm_layer = nn.BatchNorm2d inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] features = [] # conv1 layer features.append(ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)) # building inverted residual residual blockes for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) input_channel = output_channel # building last several layers features.append(ConvBNReLU(input_channel, last_channel, 1, norm_layer=norm_layer)) # combine feature layers self.features = nn.Sequential(*features) # building classifier self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(last_channel, num_classes) ) if weights_path is None: # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) else: self.load_state_dict(torch.load(weights_path)) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ================================================ FILE: pytorch_object_detection/faster_rcnn/backbone/resnet50_fpn_model.py ================================================ import os import torch import torch.nn as nn from torchvision.ops.misc import FrozenBatchNorm2d from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = norm_layer(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = norm_layer(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = norm_layer(out_channel * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): norm_layer = self._norm_layer downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), norm_layer(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, norm_layer=norm_layer)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, norm_layer=norm_layer)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def overwrite_eps(model, eps): """ This method overwrites the default eps values of all the FrozenBatchNorm2d layers of the model with the provided value. This is necessary to address the BC-breaking change introduced by the bug-fix at pytorch/vision#2933. The overwrite is applied only when the pretrained weights are loaded to maintain compatibility with previous versions. Args: model (nn.Module): The model on which we perform the overwrite. eps (float): The new value of eps. """ for module in model.modules(): if isinstance(module, FrozenBatchNorm2d): module.eps = eps def resnet50_fpn_backbone(pretrain_path="", norm_layer=FrozenBatchNorm2d, # FrozenBatchNorm2d的功能与BatchNorm2d类似,但参数无法更新 trainable_layers=3, returned_layers=None, extra_blocks=None): """ 搭建resnet50_fpn——backbone Args: pretrain_path: resnet50的预训练权重,如果不使用就默认为空 norm_layer: 官方默认的是FrozenBatchNorm2d,即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差,还不如不用bn层) 如果自己的GPU显存很大可以设置很大的batch_size,那么自己可以传入正常的BatchNorm2d层 (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) trainable_layers: 指定训练哪些层结构 returned_layers: 指定哪些层的输出需要返回 extra_blocks: 在输出的特征层基础上额外添加的层结构 Returns: """ resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3], include_top=False, norm_layer=norm_layer) if isinstance(norm_layer, FrozenBatchNorm2d): overwrite_eps(resnet_backbone, 0.0) if pretrain_path != "": assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path) # 载入预训练权重 print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False)) # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # 如果要训练所有层结构的话,不要忘了conv1后还有一个bn1 if trainable_layers == 5: layers_to_train.append("bn1") # freeze layers for name, parameter in resnet_backbone.named_parameters(): # 只训练不在layers_to_train列表中的层结构 if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) if extra_blocks is None: extra_blocks = LastLevelMaxPool() if returned_layers is None: returned_layers = [1, 2, 3, 4] # 返回的特征层个数肯定大于0小于5 assert min(returned_layers) > 0 and max(returned_layers) < 5 # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)} # in_channel 为layer4的输出特征矩阵channel = 2048 in_channels_stage2 = resnet_backbone.in_channel // 8 # 256 # 记录resnet50提供给fpn的每个特征层channel in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] # 通过fpn后得到的每个特征层的channel out_channels = 256 return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) ================================================ FILE: pytorch_object_detection/faster_rcnn/backbone/vgg_model.py ================================================ import torch.nn as nn import torch class VGG(nn.Module): def __init__(self, features, class_num=1000, init_weights=False, weights_path=None): super(VGG, self).__init__() self.features = features self.classifier = nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, class_num) ) if init_weights and weights_path is None: self._initialize_weights() if weights_path is not None: self.load_state_dict(torch.load(weights_path)) def forward(self, x): # N x 3 x 224 x 224 x = self.features(x) # N x 512 x 7 x 7 x = torch.flatten(x, start_dim=1) # N x 512*7*7 x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.xavier_uniform_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight) # nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) def make_features(cfg: list): layers = [] in_channels = 3 for v in cfg: if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) layers += [conv2d, nn.ReLU(True)] in_channels = v return nn.Sequential(*layers) cfgs = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } def vgg(model_name="vgg16", weights_path=None): assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name) cfg = cfgs[model_name] model = VGG(make_features(cfg), weights_path=weights_path) return model ================================================ FILE: pytorch_object_detection/faster_rcnn/change_backbone_with_fpn.py ================================================ import os import datetime import torch import transforms from network_files import FasterRCNN, AnchorsGenerator from my_dataset import VOCDataSet from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from train_utils import train_eval_utils as utils from backbone import BackboneWithFPN, LastLevelMaxPool def create_model(num_classes): import torchvision from torchvision.models.feature_extraction import create_feature_extractor # --- mobilenet_v3_large fpn backbone --- # backbone = torchvision.models.mobilenet_v3_large(pretrained=True) # print(backbone) return_layers = {"features.6": "0", # stride 8 "features.12": "1", # stride 16 "features.16": "2"} # stride 32 # 提供给fpn的每个特征层channel in_channels_list = [40, 112, 960] new_backbone = create_feature_extractor(backbone, return_layers) # img = torch.randn(1, 3, 224, 224) # outputs = new_backbone(img) # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()] # --- efficientnet_b0 fpn backbone --- # # backbone = torchvision.models.efficientnet_b0(pretrained=True) # # print(backbone) # return_layers = {"features.3": "0", # stride 8 # "features.4": "1", # stride 16 # "features.8": "2"} # stride 32 # # 提供给fpn的每个特征层channel # in_channels_list = [40, 80, 1280] # new_backbone = create_feature_extractor(backbone, return_layers) # # img = torch.randn(1, 3, 224, 224) # # outputs = new_backbone(img) # # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()] backbone_with_fpn = BackboneWithFPN(new_backbone, return_layers=return_layers, in_channels_list=in_channels_list, out_channels=256, extra_blocks=LastLevelMaxPool(), re_getter=False) anchor_sizes = ((64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) anchor_generator = AnchorsGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2'], # 在哪些特征层上进行RoIAlign pooling output_size=[7, 7], # RoIAlign pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone_with_fpn, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_data_set_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + 20 classes model = create_model(num_classes=args.num_classes + 1) # print(model) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.33) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if args.resume != "": checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) print("the training process from epoch{}...".format(args.start_epoch)) train_loss = [] learning_rate = [] val_map = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 10 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device=device, epoch=epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_set_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=15, type=int, metavar='N', help='number of total epochs to run') # 学习率 parser.add_argument('--lr', default=0.005, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练的batch size parser.add_argument('--batch_size', default=4, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/faster_rcnn/change_backbone_without_fpn.py ================================================ import os import datetime import torch import transforms from network_files import FasterRCNN, AnchorsGenerator from my_dataset import VOCDataSet from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from train_utils import train_eval_utils as utils def create_model(num_classes): import torchvision from torchvision.models.feature_extraction import create_feature_extractor # vgg16 backbone = torchvision.models.vgg16_bn(pretrained=True) # print(backbone) backbone = create_feature_extractor(backbone, return_nodes={"features.42": "0"}) # out = backbone(torch.rand(1, 3, 224, 224)) # print(out["0"].shape) backbone.out_channels = 512 # resnet50 backbone # backbone = torchvision.models.resnet50(pretrained=True) # # print(backbone) # backbone = create_feature_extractor(backbone, return_nodes={"layer3": "0"}) # # out = backbone(torch.rand(1, 3, 224, 224)) # # print(out["0"].shape) # backbone.out_channels = 1024 # EfficientNetB0 # backbone = torchvision.models.efficientnet_b0(pretrained=True) # # print(backbone) # backbone = create_feature_extractor(backbone, return_nodes={"features.5": "0"}) # # out = backbone(torch.rand(1, 3, 224, 224)) # # print(out["0"].shape) # backbone.out_channels = 112 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行RoIAlign pooling output_size=[7, 7], # RoIAlign pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_data_set_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + 20 classes model = create_model(num_classes=args.num_classes + 1) # print(model) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.33) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if args.resume != "": checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) print("the training process from epoch{}...".format(args.start_epoch)) train_loss = [] learning_rate = [] val_map = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 10 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device=device, epoch=epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_set_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=15, type=int, metavar='N', help='number of total epochs to run') # 学习率 parser.add_argument('--lr', default=0.005, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练的batch size parser.add_argument('--batch_size', default=4, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/faster_rcnn/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = False): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: pytorch_object_detection/faster_rcnn/my_dataset.py ================================================ import numpy as np from torch.utils.data import Dataset import os import torch import json from PIL import Image from lxml import etree class VOCDataSet(Dataset): """读取解析PASCAL VOC2007/2012数据集""" def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): assert year in ["2007", "2012"], "year must be in ['2007', '2012']" # 增加容错能力 if "VOCdevkit" in voc_root: self.root = os.path.join(voc_root, f"VOC{year}") else: self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") self.img_root = os.path.join(self.root, "JPEGImages") self.annotations_root = os.path.join(self.root, "Annotations") # read train.txt or val.txt file txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name) assert os.path.exists(txt_path), "not found {} file.".format(txt_name) with open(txt_path) as read: xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml") for line in read.readlines() if len(line.strip()) > 0] self.xml_list = [] # check file for xml_path in xml_list: if os.path.exists(xml_path) is False: print(f"Warning: not found '{xml_path}', skip this annotation file.") continue # check for targets with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] if "object" not in data: print(f"INFO: no objects in {xml_path}, skip this annotation file.") continue self.xml_list.append(xml_path) assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path) # read class_indict json_file = './pascal_voc_classes.json' assert os.path.exists(json_file), "{} file not exist.".format(json_file) with open(json_file, 'r') as f: self.class_dict = json.load(f) self.transforms = transforms def __len__(self): return len(self.xml_list) def __getitem__(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] img_path = os.path.join(self.img_root, data["filename"]) image = Image.open(img_path) if image.format != "JPEG": raise ValueError("Image '{}' format not JPEG".format(img_path)) boxes = [] labels = [] iscrowd = [] assert "object" in data, "{} lack of object information.".format(xml_path) for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) # 进一步检查数据,有的标注信息中可能有w或h为0的情况,这样的数据会导致计算回归loss为nan if xmax <= xmin or ymax <= ymin: print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path)) continue boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) if "difficult" in obj: iscrowd.append(int(obj["difficult"])) else: iscrowd.append(0) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd if self.transforms is not None: image, target = self.transforms(image, target) return image, target def get_height_and_width(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) return data_height, data_width def parse_xml_to_dict(self, xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = self.parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def coco_index(self, idx): """ 该方法是专门为pycocotools统计标签信息准备,不对图像和标签作任何处理 由于不用去读取图片,可大幅缩减统计时间 Args: idx: 输入需要获取图像的索引 """ # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) # img_path = os.path.join(self.img_root, data["filename"]) # image = Image.open(img_path) # if image.format != "JPEG": # raise ValueError("Image format not JPEG") boxes = [] labels = [] iscrowd = [] for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) iscrowd.append(int(obj["difficult"])) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd return (data_height, data_width), target @staticmethod def collate_fn(batch): return tuple(zip(*batch)) # import transforms # from draw_box_utils import draw_objs # from PIL import Image # import json # import matplotlib.pyplot as plt # import torchvision.transforms as ts # import random # # # read class_indict # category_index = {} # try: # json_file = open('./pascal_voc_classes.json', 'r') # class_dict = json.load(json_file) # category_index = {str(v): str(k) for k, v in class_dict.items()} # except Exception as e: # print(e) # exit(-1) # # data_transform = { # "train": transforms.Compose([transforms.ToTensor(), # transforms.RandomHorizontalFlip(0.5)]), # "val": transforms.Compose([transforms.ToTensor()]) # } # # # load train data set # train_data_set = VOCDataSet(os.getcwd(), "2012", data_transform["train"], "train.txt") # print(len(train_data_set)) # for index in random.sample(range(0, len(train_data_set)), k=5): # img, target = train_data_set[index] # img = ts.ToPILImage()(img) # plot_img = draw_objs(img, # target["boxes"].numpy(), # target["labels"].numpy(), # np.ones(target["labels"].shape[0]), # category_index=category_index, # box_thresh=0.5, # line_thickness=3, # font='arial.ttf', # font_size=20) # plt.imshow(plot_img) # plt.show() ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/__init__.py ================================================ from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor from .rpn_function import AnchorsGenerator ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/boxes.py ================================================ import torch from typing import Tuple from torch import Tensor import torchvision def nms(boxes, scores, iou_threshold): # type: (Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). NMS iteratively removes lower scoring boxes which have an IoU greater than iou_threshold with another (higher scoring) box. Parameters ---------- boxes : Tensor[N, 4]) boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes iou_threshold : float discards all overlapping boxes with IoU > iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ return torch.ops.torchvision.nms(boxes, scores, iou_threshold) def batched_nms(boxes, scores, idxs, iou_threshold): # type: (Tensor, Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Parameters ---------- boxes : Tensor[N, 4] boxes where NMS will be performed. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes idxs : Tensor[N] indices of the categories for each one of the boxes. iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) # strategy: in order to perform NMS independently per class. # we add an offset to all the boxes. The offset is dependent # only on the class idx, and is large enough so that boxes # from different classes do not overlap # 获取所有boxes中最大的坐标值(xmin, ymin, xmax, ymax) max_coordinate = boxes.max() # to(): Performs Tensor dtype and/or device conversion # 为每一个类别/每一层生成一个很大的偏移量 # 这里的to只是让生成tensor的dytpe和device与boxes保持一致 offsets = idxs.to(boxes) * (max_coordinate + 1) # boxes加上对应层的偏移量后,保证不同类别/层之间boxes不会有重合的现象 boxes_for_nms = boxes + offsets[:, None] keep = nms(boxes_for_nms, scores, iou_threshold) return keep def remove_small_boxes(boxes, min_size): # type: (Tensor, float) -> Tensor """ Remove boxes which contains at least one side smaller than min_size. 移除宽高小于指定阈值的索引 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format min_size (float): minimum size Returns: keep (Tensor[K]): indices of the boxes that have both sides larger than min_size """ ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # 预测boxes的宽和高 # keep = (ws >= min_size) & (hs >= min_size) # 当满足宽,高都大于给定阈值时为True keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size)) # nonzero(): Returns a tensor containing the indices of all non-zero elements of input # keep = keep.nonzero().squeeze(1) keep = torch.where(keep)[0] return keep def clip_boxes_to_image(boxes, size): # type: (Tensor, Tuple[int, int]) -> Tensor """ Clip boxes so that they lie inside an image of size `size`. 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format size (Tuple[height, width]): size of the image Returns: clipped_boxes (Tensor[N, 4]) """ dim = boxes.dim() boxes_x = boxes[..., 0::2] # x1, x2 boxes_y = boxes[..., 1::2] # y1, y2 height, width = size if torchvision._is_tracing(): boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device)) else: boxes_x = boxes_x.clamp(min=0, max=width) # 限制x坐标范围在[0,width]之间 boxes_y = boxes_y.clamp(min=0, max=height) # 限制y坐标范围在[0,height]之间 clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) return clipped_boxes.reshape(boxes.shape) def box_area(boxes): """ Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. Arguments: boxes (Tensor[N, 4]): boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format Returns: area (Tensor[N]): area for each box """ return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def box_iou(boxes1, boxes2): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: boxes1 (Tensor[N, 4]) boxes2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ area1 = box_area(boxes1) area2 = box_area(boxes2) # When the shapes do not match, # the shape of the returned output tensor follows the broadcasting rules lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # left-top [N,M,2] rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # right-bottom [N,M,2] wh = (rb - lt).clamp(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] iou = inter / (area1[:, None] + area2 - inter) return iou ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/det_utils.py ================================================ import torch import math from typing import List, Tuple from torch import Tensor class BalancedPositiveNegativeSampler(object): """ This class samples batches, ensuring that they contain a fixed proportion of positives """ def __init__(self, batch_size_per_image, positive_fraction): # type: (int, float) -> None """ Arguments: batch_size_per_image (int): number of elements to be selected per image positive_fraction (float): percentage of positive elements per batch """ self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction def __call__(self, matched_idxs): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ Arguments: matched idxs: list of tensors containing -1, 0 or positive values. Each tensor corresponds to a specific image. -1 values are ignored, 0 are considered as negatives and > 0 as positives. Returns: pos_idx (list[tensor]) neg_idx (list[tensor]) Returns two lists of binary masks for each image. The first list contains the positive elements that were selected, and the second list the negative example. """ pos_idx = [] neg_idx = [] # 遍历每张图像的matched_idxs for matched_idxs_per_image in matched_idxs: # >= 1的为正样本, nonzero返回非零元素索引 # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0] # = 0的为负样本 # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0] # 指定正样本的数量 num_pos = int(self.batch_size_per_image * self.positive_fraction) # protect against not enough positive examples # 如果正样本数量不够就直接采用所有正样本 num_pos = min(positive.numel(), num_pos) # 指定负样本数量 num_neg = self.batch_size_per_image - num_pos # protect against not enough negative examples # 如果负样本数量不够就直接采用所有负样本 num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples # Returns a random permutation of integers from 0 to n - 1. # 随机选择指定数量的正负样本 perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] pos_idx_per_image = positive[perm1] neg_idx_per_image = negative[perm2] # create binary mask from indices pos_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) neg_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) pos_idx_per_image_mask[pos_idx_per_image] = 1 neg_idx_per_image_mask[neg_idx_per_image] = 1 pos_idx.append(pos_idx_per_image_mask) neg_idx.append(neg_idx_per_image_mask) return pos_idx, neg_idx @torch.jit._script_if_tracing def encode_boxes(reference_boxes, proposals, weights): # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes(gt) proposals (Tensor): boxes to be encoded(anchors) weights: """ # perform some unpacking to make it JIT-fusion friendly wx = weights[0] wy = weights[1] ww = weights[2] wh = weights[3] # unsqueeze() # Returns a new tensor with a dimension of size one inserted at the specified position. proposals_x1 = proposals[:, 0].unsqueeze(1) proposals_y1 = proposals[:, 1].unsqueeze(1) proposals_x2 = proposals[:, 2].unsqueeze(1) proposals_y2 = proposals[:, 3].unsqueeze(1) reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1) reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1) reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1) reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1) # implementation starts here # parse widths and heights ex_widths = proposals_x2 - proposals_x1 ex_heights = proposals_y2 - proposals_y1 # parse coordinate of center point ex_ctr_x = proposals_x1 + 0.5 * ex_widths ex_ctr_y = proposals_y1 + 0.5 * ex_heights gt_widths = reference_boxes_x2 - reference_boxes_x1 gt_heights = reference_boxes_y2 - reference_boxes_y1 gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * torch.log(gt_widths / ex_widths) targets_dh = wh * torch.log(gt_heights / ex_heights) targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets class BoxCoder(object): """ This class encodes and decodes a set of bounding boxes into the representation used for training the regressors. """ def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): # type: (Tuple[float, float, float, float], float) -> None """ Arguments: weights (4-element tuple) bbox_xform_clip (float) """ self.weights = weights self.bbox_xform_clip = bbox_xform_clip def encode(self, reference_boxes, proposals): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 结合anchors和与之对应的gt计算regression参数 Args: reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes proposals: List[Tensor] anchors/proposals Returns: regression parameters """ # 统计每张图像的anchors个数,方便后面拼接在一起处理后在分开 # reference_boxes和proposal数据结构相同 boxes_per_image = [len(b) for b in reference_boxes] reference_boxes = torch.cat(reference_boxes, dim=0) proposals = torch.cat(proposals, dim=0) # targets_dx, targets_dy, targets_dw, targets_dh targets = self.encode_single(reference_boxes, proposals) return targets.split(boxes_per_image, 0) def encode_single(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ dtype = reference_boxes.dtype device = reference_boxes.device weights = torch.as_tensor(self.weights, dtype=dtype, device=device) targets = encode_boxes(reference_boxes, proposals, weights) return targets def decode(self, rel_codes, boxes): # type: (Tensor, List[Tensor]) -> Tensor """ Args: rel_codes: bbox regression parameters boxes: anchors/proposals Returns: """ assert isinstance(boxes, (list, tuple)) assert isinstance(rel_codes, torch.Tensor) boxes_per_image = [b.size(0) for b in boxes] concat_boxes = torch.cat(boxes, dim=0) box_sum = 0 for val in boxes_per_image: box_sum += val # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标 pred_boxes = self.decode_single( rel_codes, concat_boxes ) # 防止pred_boxes为空时导致reshape报错 if box_sum > 0: pred_boxes = pred_boxes.reshape(box_sum, -1, 4) return pred_boxes def decode_single(self, rel_codes, boxes): """ From a set of original boxes and encoded relative box offsets, get the decoded boxes. Arguments: rel_codes (Tensor): encoded boxes (bbox regression parameters) boxes (Tensor): reference boxes (anchors/proposals) """ boxes = boxes.to(rel_codes.dtype) # xmin, ymin, xmax, ymax widths = boxes[:, 2] - boxes[:, 0] # anchor/proposal宽度 heights = boxes[:, 3] - boxes[:, 1] # anchor/proposal高度 ctr_x = boxes[:, 0] + 0.5 * widths # anchor/proposal中心x坐标 ctr_y = boxes[:, 1] + 0.5 * heights # anchor/proposal中心y坐标 wx, wy, ww, wh = self.weights # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5] dx = rel_codes[:, 0::4] / wx # 预测anchors/proposals的中心坐标x回归参数 dy = rel_codes[:, 1::4] / wy # 预测anchors/proposals的中心坐标y回归参数 dw = rel_codes[:, 2::4] / ww # 预测anchors/proposals的宽度回归参数 dh = rel_codes[:, 3::4] / wh # 预测anchors/proposals的高度回归参数 # limit max value, prevent sending too large values into torch.exp() # self.bbox_xform_clip=math.log(1000. / 16) 4.135 dw = torch.clamp(dw, max=self.bbox_xform_clip) dh = torch.clamp(dh, max=self.bbox_xform_clip) pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] pred_w = torch.exp(dw) * widths[:, None] pred_h = torch.exp(dh) * heights[:, None] # xmin pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymin pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h # xmax pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymax pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1) return pred_boxes class Matcher(object): BELOW_LOW_THRESHOLD = -1 BETWEEN_THRESHOLDS = -2 __annotations__ = { 'BELOW_LOW_THRESHOLD': int, 'BETWEEN_THRESHOLDS': int, } def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): # type: (float, float, bool) -> None """ Args: high_threshold (float): quality values greater than or equal to this value are candidate matches. low_threshold (float): a lower quality threshold used to stratify matches into three levels: 1) matches >= high_threshold 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) allow_low_quality_matches (bool): if True, produce additional matches for predictions that have only low-quality match candidates. See set_low_quality_matches_ for more details. """ self.BELOW_LOW_THRESHOLD = -1 self.BETWEEN_THRESHOLDS = -2 assert low_threshold <= high_threshold self.high_threshold = high_threshold # 0.7 self.low_threshold = low_threshold # 0.3 self.allow_low_quality_matches = allow_low_quality_matches def __call__(self, match_quality_matrix): """ 计算anchors与每个gtboxes匹配的iou最大值,并记录索引, iou= self.low_threshold) & ( matched_vals < self.high_threshold ) # iou小于low_threshold的matches索引置为-1 matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD # -1 # iou在[low_threshold, high_threshold]之间的matches索引置为-2 matches[between_thresholds] = self.BETWEEN_THRESHOLDS # -2 if self.allow_low_quality_matches: assert all_matches is not None self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) return matches def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): """ Produce additional matches for predictions that have only low-quality matches. Specifically, for each ground-truth find the set of predictions that have maximum overlap with it (including ties); for each prediction in that set, if it is unmatched, then match it to the ground-truth with which it has the highest quality value. """ # For each gt, find the prediction with which it has highest quality # 对于每个gt boxes寻找与其iou最大的anchor, # highest_quality_foreach_gt为匹配到的最大iou值 highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) # the dimension to reduce. # Find highest quality match available, even if it is low, including ties # 寻找每个gt boxes与其iou最大的anchor索引,一个gt匹配到的最大iou可能有多个anchor # gt_pred_pairs_of_highest_quality = torch.nonzero( # match_quality_matrix == highest_quality_foreach_gt[:, None] # ) gt_pred_pairs_of_highest_quality = torch.where( torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None]) ) # Example gt_pred_pairs_of_highest_quality: # tensor([[ 0, 39796], # [ 1, 32055], # [ 1, 32070], # [ 2, 39190], # [ 2, 40255], # [ 3, 40390], # [ 3, 41455], # [ 4, 45470], # [ 5, 45325], # [ 5, 46390]]) # Each row is a (gt index, prediction index) # Note how gt items 1, 2, 3, and 5 each have two ties # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要) # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] pre_inds_to_update = gt_pred_pairs_of_highest_quality[1] # 保留该anchor匹配gt最大iou的索引,即使iou低于设定的阈值 matches[pre_inds_to_update] = all_matches[pre_inds_to_update] def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ n = torch.abs(input - target) # cond = n < beta cond = torch.lt(n, beta) loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum() ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/faster_rcnn_framework.py ================================================ import warnings from collections import OrderedDict from typing import Tuple, List, Dict, Optional, Union import torch from torch import nn, Tensor import torch.nn.functional as F from torchvision.ops import MultiScaleRoIAlign from .roi_head import RoIHeads from .transform import GeneralizedRCNNTransform from .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork class FasterRCNNBase(nn.Module): """ Main class for Generalized R-CNN. Arguments: backbone (nn.Module): rpn (nn.Module): roi_heads (nn.Module): takes the features + the proposals from the RPN and computes detections / masks from it. transform (nn.Module): performs the data transformation from the inputs to feed into the model """ def __init__(self, backbone, rpn, roi_heads, transform): super(FasterRCNNBase, self).__init__() self.transform = transform self.backbone = backbone self.rpn = rpn self.roi_heads = roi_heads # used only on torchscript mode self._has_warned = False @torch.jit.unused def eager_outputs(self, losses, detections): # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]] if self.training: return losses return detections def forward(self, images, targets=None): # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]] """ Arguments: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if self.training: assert targets is not None for target in targets: # 进一步判断传入的target的boxes参数是否符合规定 boxes = target["boxes"] if isinstance(boxes, torch.Tensor): if len(boxes.shape) != 2 or boxes.shape[-1] != 4: raise ValueError("Expected target boxes to be a tensor" "of shape [N, 4], got {:}.".format( boxes.shape)) else: raise ValueError("Expected target boxes to be of type " "Tensor, got {:}.".format(type(boxes))) original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) for img in images: val = img.shape[-2:] assert len(val) == 2 # 防止输入的是个一维向量 original_image_sizes.append((val[0], val[1])) # original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) # 对图像进行预处理 # print(images.tensors.shape) features = self.backbone(images.tensors) # 将图像输入backbone得到特征图 if isinstance(features, torch.Tensor): # 若只在一层特征层上预测,将feature放入有序字典中,并编号为‘0’ features = OrderedDict([('0', features)]) # 若在多层特征层上预测,传入的就是一个有序字典 # 将特征层以及标注target信息传入rpn中 # proposals: List[Tensor], Tensor_shape: [num_proposals, 4], # 每个proposals是绝对坐标,且为(x1, y1, x2, y2)格式 proposals, proposal_losses = self.rpn(images, features, targets) # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分 detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) # 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) losses = {} losses.update(detector_losses) losses.update(proposal_losses) if torch.jit.is_scripting(): if not self._has_warned: warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting") self._has_warned = True return losses, detections else: return self.eager_outputs(losses, detections) # if self.training: # return losses # # return detections class TwoMLPHead(nn.Module): """ Standard heads for FPN-based models Arguments: in_channels (int): number of input channels representation_size (int): size of the intermediate representation """ def __init__(self, in_channels, representation_size): super(TwoMLPHead, self).__init__() self.fc6 = nn.Linear(in_channels, representation_size) self.fc7 = nn.Linear(representation_size, representation_size) def forward(self, x): x = x.flatten(start_dim=1) x = F.relu(self.fc6(x)) x = F.relu(self.fc7(x)) return x class FastRCNNPredictor(nn.Module): """ Standard classification + bounding box regression layers for Fast R-CNN. Arguments: in_channels (int): number of input channels num_classes (int): number of output classes (including background) """ def __init__(self, in_channels, num_classes): super(FastRCNNPredictor, self).__init__() self.cls_score = nn.Linear(in_channels, num_classes) self.bbox_pred = nn.Linear(in_channels, num_classes * 4) def forward(self, x): if x.dim() == 4: assert list(x.shape[2:]) == [1, 1] x = x.flatten(start_dim=1) scores = self.cls_score(x) bbox_deltas = self.bbox_pred(x) return scores, bbox_deltas class FasterRCNN(FasterRCNNBase): """ Implements Faster R-CNN. The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each image, and should be in 0-1 range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values between 0 and H and 0 and W - labels (Int64Tensor[N]): the class label for each ground-truth box The model returns a Dict[Tensor] during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between 0 and H and 0 and W - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction Arguments: backbone (nn.Module): the network used to compute the features for the model. It should contain a out_channels attribute, which indicates the number of output channels that each feature map has (and it should be the same for all feature maps). The backbone should return a single Tensor or and OrderedDict[Tensor]. num_classes (int): number of output classes of the model (including the background). If box_predictor is specified, num_classes should be None. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone image_mean (Tuple[float, float, float]): mean values used for input normalization. They are generally the mean values of the dataset on which the backbone has been trained on image_std (Tuple[float, float, float]): std values used for input normalization. They are generally the std values of the dataset on which the backbone has been trained on rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN rpn_score_thresh (float): during inference, only return proposals with a classification score greater than rpn_score_thresh box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in the locations indicated by the bounding boxes box_head (nn.Module): module that takes the cropped feature maps as input box_predictor (nn.Module): module that takes the output of box_head and returns the classification logits and box regression deltas. box_score_thresh (float): during inference, only return proposals with a classification score greater than box_score_thresh box_nms_thresh (float): NMS threshold for the prediction head. Used during inference box_detections_per_img (int): maximum number of detections per image, for all classes. box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be considered as positive during training of the classification head box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be considered as negative during training of the classification head box_batch_size_per_image (int): number of proposals that are sampled during training of the classification head box_positive_fraction (float): proportion of positive proposals in a mini-batch during training of the classification head bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the bounding boxes """ def __init__(self, backbone, num_classes=None, # transform parameter min_size=800, max_size=1333, # 预处理resize时限制的最小尺寸与最大尺寸 image_mean=None, image_std=None, # 预处理normalize时使用的均值和方差 # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, # rpn中在nms处理前保留的proposal数(根据score) rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, # rpn中在nms处理后保留的proposal数 rpn_nms_thresh=0.7, # rpn中进行nms处理时使用的iou阈值 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, # rpn计算损失时,采集正负样本设置的阈值 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # rpn计算损失时采样的样本数,以及正样本占总样本的比例 rpn_score_thresh=0.0, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, # 移除低目标概率 fast rcnn中进行nms处理的阈值 对预测结果根据score排序取前100个目标 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, # fast rcnn计算误差时,采集正负样本设置的阈值 box_batch_size_per_image=512, box_positive_fraction=0.25, # fast rcnn计算误差时采样的样本数,以及正样本占所有样本的比例 bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels" "specifying the number of output channels (assumed to be the" "same for all the levels" ) assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError("num_classes should be None when box_predictor " "is specified") else: if box_predictor is None: raise ValueError("num_classes should not be None when box_predictor " "is not specified") # 预测特征层的channels out_channels = backbone.out_channels # 若anchor生成器为空,则自动生成针对resnet50_fpn的anchor生成器 if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorsGenerator( anchor_sizes, aspect_ratios ) # 生成RPN通过滑动窗口预测网络部分 if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0] ) # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000, # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000, rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) # 定义整个RPN框架 rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) # Multi-scale RoIAlign pooling if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], # 在哪些特征层进行roi pooling output_size=[7, 7], sampling_ratio=2) # fast RCNN中roi pooling后的展平处理两个全连接层部分 if box_head is None: resolution = box_roi_pool.output_size[0] # 默认等于7 representation_size = 1024 box_head = TwoMLPHead( out_channels * resolution ** 2, representation_size ) # 在box_head的输出上预测部分 if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( representation_size, num_classes) # 将roi pooling, box_head以及box_predictor结合在一起 roi_heads = RoIHeads( # box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, # 0.5 0.5 box_batch_size_per_image, box_positive_fraction, # 512 0.25 bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) # 0.05 0.5 100 if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] # 对数据进行标准化,缩放,打包成batch等处理部分 transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform) ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/image_list.py ================================================ from typing import List, Tuple from torch import Tensor class ImageList(object): """ Structure that holds a list of images (of possibly varying sizes) as a single tensor. This works by padding the images to the same size, and storing in a field the original sizes of each image """ def __init__(self, tensors, image_sizes): # type: (Tensor, List[Tuple[int, int]]) -> None """ Arguments: tensors (tensor) padding后的图像数据 image_sizes (list[tuple[int, int]]) padding前的图像尺寸 """ self.tensors = tensors self.image_sizes = image_sizes def to(self, device): # type: (Device) -> ImageList # noqa cast_tensor = self.tensors.to(device) return ImageList(cast_tensor, self.image_sizes) ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/roi_head.py ================================================ from typing import Optional, List, Dict, Tuple import torch from torch import Tensor import torch.nn.functional as F from . import det_utils from . import boxes as box_ops def fastrcnn_loss(class_logits, box_regression, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ Computes the loss for Faster R-CNN. Arguments: class_logits : 预测类别概率信息,shape=[num_anchors, num_classes] box_regression : 预测边目标界框回归信息 labels : 真实类别信息 regression_targets : 真实目标边界框信息 Returns: classification_loss (Tensor) box_loss (Tensor) """ labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) # 计算类别损失信息 classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing # 返回标签类别大于0的索引 # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1) sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0] # 返回标签类别大于0位置的类别信息 labels_pos = labels[sampled_pos_inds_subset] # shape=[num_proposal, num_classes] N, num_classes = class_logits.shape box_regression = box_regression.reshape(N, -1, 4) # 计算边界框损失信息 box_loss = det_utils.smooth_l1_loss( # 获取指定索引proposal的指定类别box信息 box_regression[sampled_pos_inds_subset, labels_pos], regression_targets[sampled_pos_inds_subset], beta=1 / 9, size_average=False, ) / labels.numel() return classification_loss, box_loss class RoIHeads(torch.nn.Module): __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, } def __init__(self, box_roi_pool, # Multi-scale RoIAlign pooling box_head, # TwoMLPHead box_predictor, # FastRCNNPredictor # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, # default: 0.5, 0.5 batch_size_per_image, positive_fraction, # default: 512, 0.25 bbox_reg_weights, # None # Faster R-CNN inference score_thresh, # default: 0.05 nms_thresh, # default: 0.5 detection_per_img): # default: 100 super(RoIHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, # default: 0.5 bg_iou_thresh, # default: 0.5 allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, # default: 512 positive_fraction) # default: 0.25 if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool # Multi-scale RoIAlign pooling self.box_head = box_head # TwoMLPHead self.box_predictor = box_predictor # FastRCNNPredictor self.score_thresh = score_thresh # default: 0.05 self.nms_thresh = nms_thresh # default: 0.5 self.detection_per_img = detection_per_img # default: 100 def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels): # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ 为每个proposal匹配对应的gt_box,并划分到正负样本中 Args: proposals: gt_boxes: gt_labels: Returns: """ matched_idxs = [] labels = [] # 遍历每张图像的proposals, gt_boxes, gt_labels信息 for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels): if gt_boxes_in_image.numel() == 0: # 该张图像中没有gt框,为背景 # background image device = proposals_in_image.device clamped_matched_idxs_in_image = torch.zeros( (proposals_in_image.shape[0],), dtype=torch.int64, device=device ) labels_in_image = torch.zeros( (proposals_in_image.shape[0],), dtype=torch.int64, device=device ) else: # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands # 计算proposal与每个gt_box的iou重合度 match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image) # 计算proposal与每个gt_box匹配的iou最大值,并记录索引, # iou < low_threshold索引值为 -1, low_threshold <= iou < high_threshold索引值为 -2 matched_idxs_in_image = self.proposal_matcher(match_quality_matrix) # 限制最小值,防止匹配标签时出现越界的情况 # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别(实际上并不是),后续会进一步处理 clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0) # 获取proposal匹配到的gt对应标签 labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image] labels_in_image = labels_in_image.to(dtype=torch.int64) # label background (below the low threshold) # 将gt索引为-1的类别设置为0,即背景,负样本 bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD # -1 labels_in_image[bg_inds] = 0 # label ignore proposals (between low and high threshold) # 将gt索引为-2的类别设置为-1, 即废弃样本 ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS # -2 labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler matched_idxs.append(clamped_matched_idxs_in_image) labels.append(labels_in_image) return matched_idxs, labels def subsample(self, labels): # type: (List[Tensor]) -> List[Tensor] # BalancedPositiveNegativeSampler sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_inds = [] # 遍历每张图片的正负样本索引 for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)): # 记录所有采集样本索引(包括正样本和负样本) # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0] sampled_inds.append(img_sampled_inds) return sampled_inds def add_gt_proposals(self, proposals, gt_boxes): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 将gt_boxes拼接到proposal后面 Args: proposals: 一个batch中每张图像rpn预测的boxes gt_boxes: 一个batch中每张图像对应的真实目标边界框 Returns: """ proposals = [ torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] return proposals def check_targets(self, targets): # type: (Optional[List[Dict[str, Tensor]]]) -> None assert targets is not None assert all(["boxes" in t for t in targets]) assert all(["labels" in t for t in targets]) def select_training_samples(self, proposals, # type: List[Tensor] targets # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] """ 划分正负样本,统计对应gt的标签以及边界框回归信息 list元素个数为batch_size Args: proposals: rpn预测的boxes targets: Returns: """ # 检查target数据是否为空 self.check_targets(targets) # 如果不加这句,jit.script会不通过(看不懂) assert targets is not None dtype = proposals[0].dtype device = proposals[0].device # 获取标注好的boxes以及labels信息 gt_boxes = [t["boxes"].to(dtype) for t in targets] gt_labels = [t["labels"] for t in targets] # append ground-truth bboxes to proposal # 将gt_boxes拼接到proposal后面 proposals = self.add_gt_proposals(proposals, gt_boxes) # get matching gt indices for each proposal # 为每个proposal匹配对应的gt_box,并划分到正负样本中 matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels) # sample a fixed proportion of positive-negative proposals # 按给定数量和比例采样正负样本 sampled_inds = self.subsample(labels) matched_gt_boxes = [] num_images = len(proposals) # 遍历每张图像 for img_id in range(num_images): # 获取每张图像的正负样本索引 img_sampled_inds = sampled_inds[img_id] # 获取对应正负样本的proposals信息 proposals[img_id] = proposals[img_id][img_sampled_inds] # 获取对应正负样本的真实类别信息 labels[img_id] = labels[img_id][img_sampled_inds] # 获取对应正负样本的gt索引信息 matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds] gt_boxes_in_image = gt_boxes[img_id] if gt_boxes_in_image.numel() == 0: gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device) # 获取对应正负样本的gt box信息 matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]]) # 根据gt和proposal计算边框回归参数(针对gt的) regression_targets = self.box_coder.encode(matched_gt_boxes, proposals) return proposals, labels, regression_targets def postprocess_detections(self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] """ 对网络的预测数据进行后处理,包括 (1)根据proposal以及预测的回归参数计算出最终bbox坐标 (2)对预测类别结果进行softmax处理 (3)裁剪预测的boxes信息,将越界的坐标调整到图片边界上 (4)移除所有背景信息 (5)移除低概率目标 (6)移除小尺寸目标 (7)执行nms处理,并按scores进行排序 (8)根据scores排序返回前topk个目标 Args: class_logits: 网络预测类别概率信息 box_regression: 网络预测的边界框回归参数 proposals: rpn输出的proposal image_shapes: 打包成batch前每张图像的宽高 Returns: """ device = class_logits.device # 预测目标类别数 num_classes = class_logits.shape[-1] # 获取每张图像的预测bbox数量 boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] # 根据proposal以及预测的回归参数计算出最终bbox坐标 pred_boxes = self.box_coder.decode(box_regression, proposals) # 对预测类别结果进行softmax处理 pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image # 根据每张图像的预测bbox数量分割结果 pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] # 遍历每张图像预测信息 for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): # 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove prediction with the background label # 移除索引为0的所有信息(0代表背景) boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes # 移除低概率目标,self.scores_thresh=0.05 # gt: Computes input > other element-wise. # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1) inds = torch.where(torch.gt(scores, self.score_thresh))[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes # 移除小目标 keep = box_ops.remove_small_boxes(boxes, min_size=1.) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximun suppression, independently done per class # 执行nms处理,执行后的结果会按照scores从大到小进行排序返回 keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions # 获取scores排在前topk个预测目标 keep = keep[:self.detection_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels def forward(self, features, # type: Dict[str, Tensor] proposals, # type: List[Tensor] image_shapes, # type: List[Tuple[int, int]] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]] """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ # 检查targets的数据类型是否正确 if targets is not None: for t in targets: floating_point_types = (torch.float, torch.double, torch.half) assert t["boxes"].dtype in floating_point_types, "target boxes must of float type" assert t["labels"].dtype == torch.int64, "target labels must of int64 type" if self.training: # 划分正负样本,统计对应gt的标签以及边界框回归信息 proposals, labels, regression_targets = self.select_training_samples(proposals, targets) else: labels = None regression_targets = None # 将采集样本通过Multi-scale RoIAlign pooling层 # box_features_shape: [num_proposals, channel, height, width] box_features = self.box_roi_pool(features, proposals, image_shapes) # 通过roi_pooling后的两层全连接层 # box_features_shape: [num_proposals, representation_size] box_features = self.box_head(box_features) # 接着分别预测目标类别和边界框回归参数 class_logits, box_regression = self.box_predictor(box_features) result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) losses = {} if self.training: assert labels is not None and regression_targets is not None loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = { "loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg } else: boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append( { "boxes": boxes[i], "labels": labels[i], "scores": scores[i], } ) return result, losses ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/rpn_function.py ================================================ from typing import List, Optional, Dict, Tuple import torch from torch import nn, Tensor from torch.nn import functional as F import torchvision from . import det_utils from . import boxes as box_ops from .image_list import ImageList @torch.jit.unused def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n): # type: (Tensor, int) -> Tuple[int, int] from torch.onnx import operators num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) pre_nms_top_n = torch.min(torch.cat( (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), num_anchors), 0)) return num_anchors, pre_nms_top_n class AnchorsGenerator(nn.Module): __annotations__ = { "cell_anchors": Optional[List[torch.Tensor]], "_cache": Dict[str, List[torch.Tensor]] } """ anchors生成器 Module that generates anchors for a set of feature maps and image sizes. The module support computing anchors at multiple sizes and aspect ratios per feature map. sizes and aspect_ratios should have the same number of elements, and it should correspond to the number of feature maps. sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors per spatial location for feature map i. Arguments: sizes (Tuple[Tuple[int]]): aspect_ratios (Tuple[Tuple[float]]): """ def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)): super(AnchorsGenerator, self).__init__() if not isinstance(sizes[0], (list, tuple)): # TODO change this sizes = tuple((s,) for s in sizes) if not isinstance(aspect_ratios[0], (list, tuple)): aspect_ratios = (aspect_ratios,) * len(sizes) assert len(sizes) == len(aspect_ratios) self.sizes = sizes self.aspect_ratios = aspect_ratios self.cell_anchors = None self._cache = {} def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")): # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor """ compute anchor sizes Arguments: scales: sqrt(anchor_area) aspect_ratios: h/w ratios dtype: float32 device: cpu/gpu """ scales = torch.as_tensor(scales, dtype=dtype, device=device) aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) h_ratios = torch.sqrt(aspect_ratios) w_ratios = 1.0 / h_ratios # [r1, r2, r3]' * [s1, s2, s3] # number of elements is len(ratios)*len(scales) ws = (w_ratios[:, None] * scales[None, :]).view(-1) hs = (h_ratios[:, None] * scales[None, :]).view(-1) # left-top, right-bottom coordinate relative to anchor center(0, 0) # 生成的anchors模板都是以(0, 0)为中心的, shape [len(ratios)*len(scales), 4] base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 return base_anchors.round() # round 四舍五入 def set_cell_anchors(self, dtype, device): # type: (torch.dtype, torch.device) -> None if self.cell_anchors is not None: cell_anchors = self.cell_anchors assert cell_anchors is not None # suppose that all anchors have the same device # which is a valid assumption in the current state of the codebase if cell_anchors[0].device == device: return # 根据提供的sizes和aspect_ratios生成anchors模板 # anchors模板都是以(0, 0)为中心的anchor cell_anchors = [ self.generate_anchors(sizes, aspect_ratios, dtype, device) for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios) ] self.cell_anchors = cell_anchors def num_anchors_per_location(self): # 计算每个预测特征层上每个滑动窗口的预测目标数 return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. def grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """ anchors position in grid coordinate axis map into origin image 计算预测特征图对应原始图像上的所有anchors的坐标 Args: grid_sizes: 预测特征矩阵的height和width strides: 预测特征矩阵上一步对应原始图像上的步距 """ anchors = [] cell_anchors = self.cell_anchors assert cell_anchors is not None # 遍历每个预测特征层的grid_size,strides和cell_anchors for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): grid_height, grid_width = size stride_height, stride_width = stride device = base_anchors.device # For output anchor, compute [x_center, y_center, x_center, y_center] # shape: [grid_width] 对应原图上的x坐标(列) shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width # shape: [grid_height] 对应原图上的y坐标(行) shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量) # torch.meshgrid函数分别传入行坐标和列坐标,生成网格行坐标矩阵和网格列坐标矩阵 # shape: [grid_height, grid_width] shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量 # shape: [grid_width*grid_height, 4] shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1) # For every (base anchor, output anchor) pair, # offset each zero-centered base anchor by the center of the output anchor. # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制) shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4) anchors.append(shifts_anchor.reshape(-1, 4)) return anchors # List[Tensor(all_num_anchors, 4)] def cached_grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """将计算得到的所有anchors信息进行缓存""" key = str(grid_sizes) + str(strides) # self._cache是字典类型 if key in self._cache: return self._cache[key] anchors = self.grid_anchors(grid_sizes, strides) self._cache[key] = anchors return anchors def forward(self, image_list, feature_maps): # type: (ImageList, List[Tensor]) -> List[Tensor] # 获取每个预测特征层的尺寸(height, width) grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps]) # 获取输入图像的height和width image_size = image_list.tensors.shape[-2:] # 获取变量类型和设备类型 dtype, device = feature_maps[0].dtype, feature_maps[0].device # one step in feature map equate n pixel stride in origin image # 计算特征层上的一步等于原始图像上的步长 strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device), torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes] # 根据提供的sizes和aspect_ratios生成anchors模板 self.set_cell_anchors(dtype, device) # 计算/读取所有anchors的坐标信息(这里的anchors信息是映射到原图上的所有anchors信息,不是anchors模板) # 得到的是一个list列表,对应每张预测特征图映射回原图的anchors坐标信息 anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # 遍历一个batch中的每张图像 for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image = [] # 遍历每张预测特征图映射回原图的anchors坐标信息 for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起 # anchors是个list,每个元素为一张图像的所有anchors信息 anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] # Clear the cache in case that memory leaks. self._cache.clear() return anchors class RPNHead(nn.Module): """ add a RPN head with classification and regression 通过滑动窗口计算预测目标概率与bbox regression参数 Arguments: in_channels: number of channels of the input feature num_anchors: number of anchors to be predicted """ def __init__(self, in_channels, num_anchors): super(RPNHead, self).__init__() # 3x3 滑动窗口 self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) # 计算预测的目标分数(这里的目标只是指前景或者背景) self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) # 计算预测的目标bbox regression参数 self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1) for layer in self.children(): if isinstance(layer, nn.Conv2d): torch.nn.init.normal_(layer.weight, std=0.01) torch.nn.init.constant_(layer.bias, 0) def forward(self, x): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] logits = [] bbox_reg = [] for i, feature in enumerate(x): t = F.relu(self.conv(feature)) logits.append(self.cls_logits(t)) bbox_reg.append(self.bbox_pred(t)) return logits, bbox_reg def permute_and_flatten(layer, N, A, C, H, W): # type: (Tensor, int, int, int, int, int) -> Tensor """ 调整tensor顺序,并进行reshape Args: layer: 预测特征层上预测的目标概率或bboxes regression参数 N: batch_size A: anchors_num_per_position C: classes_num or 4(bbox coordinate) H: height W: width Returns: layer: 调整tensor顺序,并reshape后的结果[N, -1, C] """ # view和reshape功能是一样的,先展平所有元素在按照给定shape排列 # view函数只能用于内存中连续存储的tensor,permute等操作会使tensor在内存中变得不再连续,此时就不能再调用view函数 # reshape则不需要依赖目标tensor是否在内存中是连续的 # [batch_size, anchors_num_per_position * (C or 4), height, width] layer = layer.view(N, -1, C, H, W) # 调换tensor维度 layer = layer.permute(0, 3, 4, 1, 2) # [N, H, W, -1, C] layer = layer.reshape(N, -1, C) return layer def concat_box_prediction_layers(box_cls, box_regression): # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ 对box_cla和box_regression两个list中的每个预测特征层的预测信息 的tensor排列顺序以及shape进行调整 -> [N, -1, C] Args: box_cls: 每个预测特征层上的预测目标概率 box_regression: 每个预测特征层上的预测目标bboxes regression参数 Returns: """ box_cls_flattened = [] box_regression_flattened = [] # 遍历每个预测特征层 for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression): # [batch_size, anchors_num_per_position * classes_num, height, width] # 注意,当计算RPN中的proposal时,classes_num=1,只区分目标和背景 N, AxC, H, W = box_cls_per_level.shape # # [batch_size, anchors_num_per_position * 4, height, width] Ax4 = box_regression_per_level.shape[1] # anchors_num_per_position A = Ax4 // 4 # classes_num C = AxC // A # [N, -1, C] box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W) box_cls_flattened.append(box_cls_per_level) # [N, -1, C] box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W) box_regression_flattened.append(box_regression_per_level) box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2) # start_dim, end_dim box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4) return box_cls, box_regression class RegionProposalNetwork(torch.nn.Module): """ Implements Region Proposal Network (RPN). Arguments: anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. head (nn.Module): module that computes the objectness and regression deltas fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should contain two fields: training and testing, to allow for different values depending on training or evaluation post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should contain two fields: training and testing, to allow for different values depending on training or evaluation nms_thresh (float): NMS threshold used for postprocessing the RPN proposals """ __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, 'pre_nms_top_n': Dict[str, int], 'post_nms_top_n': Dict[str, int], } def __init__(self, anchor_generator, head, fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0): super(RegionProposalNetwork, self).__init__() self.anchor_generator = anchor_generator self.head = head self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) # use during training # 计算anchors与真实bbox的iou self.box_similarity = box_ops.box_iou self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, # 当iou大于fg_iou_thresh(0.7)时视为正样本 bg_iou_thresh, # 当iou小于bg_iou_thresh(0.3)时视为负样本 allow_low_quality_matches=True ) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction # 256, 0.5 ) # use during testing self._pre_nms_top_n = pre_nms_top_n self._post_nms_top_n = post_nms_top_n self.nms_thresh = nms_thresh self.score_thresh = score_thresh self.min_size = 1. def pre_nms_top_n(self): if self.training: return self._pre_nms_top_n['training'] return self._pre_nms_top_n['testing'] def post_nms_top_n(self): if self.training: return self._post_nms_top_n['training'] return self._post_nms_top_n['testing'] def assign_targets_to_anchors(self, anchors, targets): # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]] """ 计算每个anchors最匹配的gt,并划分为正样本,背景以及废弃的样本 Args: anchors: (List[Tensor]) targets: (List[Dict[Tensor]) Returns: labels: 标记anchors归属类别(1, 0, -1分别对应正样本,背景,废弃的样本) 注意,在RPN中只有前景和背景,所有正样本的类别都是1,0代表背景 matched_gt_boxes:与anchors匹配的gt """ labels = [] matched_gt_boxes = [] # 遍历每张图像的anchors和targets for anchors_per_image, targets_per_image in zip(anchors, targets): gt_boxes = targets_per_image["boxes"] if gt_boxes.numel() == 0: device = anchors_per_image.device matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device) labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device) else: # 计算anchors与真实bbox的iou信息 # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image) # 计算每个anchors与gt匹配iou最大的索引(如果iou<0.3索引置为-1,0.3= 0 labels_per_image = labels_per_image.to(dtype=torch.float32) # background (negative examples) bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD # -1 labels_per_image[bg_indices] = 0.0 # discard indices that are between thresholds inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS # -2 labels_per_image[inds_to_discard] = -1.0 labels.append(labels_per_image) matched_gt_boxes.append(matched_gt_boxes_per_image) return labels, matched_gt_boxes def _get_top_n_idx(self, objectness, num_anchors_per_level): # type: (Tensor, List[int]) -> Tensor """ 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 Args: objectness: Tensor(每张图像的预测目标概率信息 ) num_anchors_per_level: List(每个预测特征层上的预测的anchors个数) Returns: """ r = [] # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息 offset = 0 # 遍历每个预测特征层上的预测目标概率信息 for ob in objectness.split(num_anchors_per_level, 1): if torchvision._is_tracing(): num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n()) else: num_anchors = ob.shape[1] # 预测特征层上的预测的anchors个数 pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors) # Returns the k largest elements of the given input tensor along a given dimension _, top_n_idx = ob.topk(pre_nms_top_n, dim=1) r.append(top_n_idx + offset) offset += num_anchors return torch.cat(r, dim=1) def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]] """ 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 Args: proposals: 预测的bbox坐标 objectness: 预测的目标概率 image_shapes: batch中每张图片的size信息 num_anchors_per_level: 每个预测特征层上预测anchors的数目 Returns: """ num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) # Returns a tensor of size size filled with fill_value # levels负责记录分隔不同预测特征层上的anchors索引信息 levels = [torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level)] levels = torch.cat(levels, 0) # Expand this tensor to the same size as objectness levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] # [batch_size, 1] # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息 objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息 proposals = proposals[batch_idx, top_n_idx] objectness_prob = torch.sigmoid(objectness) final_boxes = [] final_scores = [] # 遍历每张图像的相关预测信息 for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes): # 调整预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, img_shape) # 返回boxes满足宽,高都大于min_size的索引 keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # 移除小概率boxes,参考下面这个链接 # https://github.com/pytorch/vision/pull/3205 keep = torch.where(torch.ge(scores, self.score_thresh))[0] # ge: >= boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[: self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ 计算RPN损失,包括类别损失(前景与背景),bbox regression损失 Arguments: objectness (Tensor):预测的前景概率 pred_bbox_deltas (Tensor):预测的bbox regression labels (List[Tensor]):真实的标签 1, 0, -1(batch中每一张图片的labels对应List的一个元素中) regression_targets (List[Tensor]):真实的bbox regression Returns: objectness_loss (Tensor) : 类别损失 box_loss (Tensor):边界框回归损失 """ # 按照给定的batch_size_per_image, positive_fraction选择正负样本 sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起,并获取非零位置的索引 # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0] # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0] # 将所有正负样本索引拼接在一起 sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness = objectness.flatten() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) # 计算边界框回归损失 box_loss = det_utils.smooth_l1_loss( pred_bbox_deltas[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1 / 9, size_average=False, ) / (sampled_inds.numel()) # 计算目标预测概率损失 objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss def forward(self, images, # type: ImageList features, # type: Dict[str, Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]] """ Arguments: images (ImageList): images for which we want to compute the predictions features (Dict[Tensor]): features computed from the images that are used for computing the predictions. Each tensor in the list correspond to different feature levels targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional). If provided, each element in the dict should contain a field `boxes`, with the locations of the ground-truth boxes. Returns: boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per image. losses (Dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. """ # RPN uses all feature maps that are available # features是所有预测特征层组成的OrderedDict features = list(features.values()) # 计算每个预测特征层上的预测目标概率和bboxes regression参数 # objectness和pred_bbox_deltas都是list objectness, pred_bbox_deltas = self.head(features) # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size anchors = self.anchor_generator(images, features) # batch_size num_images = len(anchors) # numel() Returns the total number of elements in the input tensor. # 计算每个预测特征层上的对应的anchors数量 num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness] num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors] # 调整内部tensor格式以及shape objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas) # apply pred_bbox_deltas to anchors to obtain the decoded proposals # note that we detach the deltas because Faster R-CNN do not backprop through # the proposals # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标 proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) proposals = proposals.view(num_images, -1, 4) # 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level) losses = {} if self.training: assert targets is not None # 计算每个anchors最匹配的gt,并将anchors进行分类,前景,背景以及废弃的anchors labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets) # 结合anchors以及对应的gt,计算regression参数 regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) loss_objectness, loss_rpn_box_reg = self.compute_loss( objectness, pred_bbox_deltas, labels, regression_targets ) losses = { "loss_objectness": loss_objectness, "loss_rpn_box_reg": loss_rpn_box_reg } return boxes, losses ================================================ FILE: pytorch_object_detection/faster_rcnn/network_files/transform.py ================================================ import math from typing import List, Tuple, Dict, Optional import torch from torch import nn, Tensor import torchvision from .image_list import ImageList @torch.jit.unused def _resize_image_onnx(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor from torch.onnx import operators im_shape = operators.shape_as_tensor(image)[-2:] min_size = torch.min(im_shape).to(dtype=torch.float32) max_size = torch.max(im_shape).to(dtype=torch.float32) scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image def _resize_image(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) # 获取高宽中的最小值 max_size = float(torch.max(im_shape)) # 获取高宽中的最大值 scale_factor = self_min_size / min_size # 根据指定最小边长和图片最小边长计算缩放比例 # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长 if max_size * scale_factor > self_max_size: scale_factor = self_max_size / max_size # 将缩放比例设为指定最大边长和图片最大边长之比 # interpolate利用插值的方法缩放图片 # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W] # bilinear只支持4D Tensor image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image class GeneralizedRCNNTransform(nn.Module): """ Performs input / target transformation before feeding the data to a GeneralizedRCNN model. The transformations it perform are: - input normalization (mean subtraction and std division) - input / target resizing to match min_size / max_size It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets """ def __init__(self, min_size, max_size, image_mean, image_std): super(GeneralizedRCNNTransform, self).__init__() if not isinstance(min_size, (list, tuple)): min_size = (min_size,) self.min_size = min_size # 指定图像的最小边长范围 self.max_size = max_size # 指定图像的最大边长范围 self.image_mean = image_mean # 指定图像在标准化处理中的均值 self.image_std = image_std # 指定图像在标准化处理中的方差 def normalize(self, image): """标准化处理""" dtype, device = image.dtype, image.device mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) std = torch.as_tensor(self.image_std, dtype=dtype, device=device) # [:, None, None]: shape [3] -> [3, 1, 1] return (image - mean[:, None, None]) / std[:, None, None] def torch_choice(self, k): # type: (List[int]) -> int """ Implements `random.choice` via torch ops so it can be compiled with TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 is fixed. """ index = int(torch.empty(1).uniform_(0., float(len(k))).item()) return k[index] def resize(self, image, target): # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] """ 将图片缩放到指定的大小范围内,并对应缩放bboxes信息 Args: image: 输入的图片 target: 输入图片的相关信息(包括bboxes信息) Returns: image: 缩放后的图片 target: 缩放bboxes后的图片相关信息 """ # image shape is [channel, height, width] h, w = image.shape[-2:] if self.training: size = float(self.torch_choice(self.min_size)) # 指定输入图片的最小边长,注意是self.min_size不是min_size else: # FIXME assume for now that testing uses the largest scale size = float(self.min_size[-1]) # 指定输入图片的最小边长,注意是self.min_size不是min_size if torchvision._is_tracing(): image = _resize_image_onnx(image, size, float(self.max_size)) else: image = _resize_image(image, size, float(self.max_size)) if target is None: return image, target bbox = target["boxes"] # 根据图像的缩放比例来缩放bbox bbox = resize_boxes(bbox, [h, w], image.shape[-2:]) target["boxes"] = bbox return image, target # _onnx_batch_images() is an implementation of # batch_images() that is supported by ONNX tracing. @torch.jit.unused def _onnx_batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor max_size = [] for i in range(images[0].dim()): max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) max_size.append(max_size_i) stride = size_divisible max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) max_size = tuple(max_size) # work around for # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) # which is not yet supported in onnx padded_imgs = [] for img in images: padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]]) padded_imgs.append(padded_img) return torch.stack(padded_imgs) def max_by_axis(self, the_list): # type: (List[List[int]]) -> List[int] maxes = the_list[0] for sublist in the_list[1:]: for index, item in enumerate(sublist): maxes[index] = max(maxes[index], item) return maxes def batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor """ 将一批图像打包成一个batch返回(注意batch中每个tensor的shape是相同的) Args: images: 输入的一批图片 size_divisible: 将图像高和宽调整到该数的整数倍 Returns: batched_imgs: 打包成一个batch后的tensor数据 """ if torchvision._is_tracing(): # batch_images() does not export well to ONNX # call _onnx_batch_images() instead return self._onnx_batch_images(images, size_divisible) # 分别计算一个batch中所有图片中的最大channel, height, width max_size = self.max_by_axis([list(img.shape) for img in images]) stride = float(size_divisible) # max_size = list(max_size) # 将height向上调整到stride的整数倍 max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) # 将width向上调整到stride的整数倍 max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) # [batch, channel, height, width] batch_shape = [len(images)] + max_size # 创建shape为batch_shape且值全部为0的tensor batched_imgs = images[0].new_full(batch_shape, 0) for img, pad_img in zip(images, batched_imgs): # 将输入images中的每张图片复制到新的batched_imgs的每张图片中,对齐左上角,保证bboxes的坐标不变 # 这样保证输入到网络中一个batch的每张图片的shape相同 # copy_: Copies the elements from src into self tensor and returns self pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) return batched_imgs def postprocess(self, result, # type: List[Dict[str, Tensor]] image_shapes, # type: List[Tuple[int, int]] original_image_sizes # type: List[Tuple[int, int]] ): # type: (...) -> List[Dict[str, Tensor]] """ 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) Args: result: list(dict), 网络的预测结果, len(result) == batch_size image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size Returns: """ if self.training: return result # 遍历每张图片的预测信息,将boxes信息还原回原尺度 for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): boxes = pred["boxes"] boxes = resize_boxes(boxes, im_s, o_im_s) # 将bboxes缩放回原图像尺度上 result[i]["boxes"] = boxes return result def __repr__(self): """自定义输出实例化对象的信息,可通过print打印实例信息""" format_string = self.__class__.__name__ + '(' _indent = '\n ' format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std) format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size, self.max_size) format_string += '\n)' return format_string def forward(self, images, # type: List[Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]] images = [img for img in images] for i in range(len(images)): image = images[i] target_index = targets[i] if targets is not None else None if image.dim() != 3: raise ValueError("images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) image = self.normalize(image) # 对图像进行标准化处理 image, target_index = self.resize(image, target_index) # 对图像和对应的bboxes缩放到指定范围 images[i] = image if targets is not None and target_index is not None: targets[i] = target_index # 记录resize后的图像尺寸 image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) # 将images打包成一个batch image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) for image_size in image_sizes: assert len(image_size) == 2 image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes_list) return image_list, targets def resize_boxes(boxes, original_size, new_size): # type: (Tensor, List[int], List[int]) -> Tensor """ 将boxes参数根据图像的缩放情况进行相应缩放 Arguments: original_size: 图像缩放前的尺寸 new_size: 图像缩放后的尺寸 """ ratios = [ torch.tensor(s, dtype=torch.float32, device=boxes.device) / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) for s, s_orig in zip(new_size, original_size) ] ratios_height, ratios_width = ratios # Removes a tensor dimension, boxes [minibatch, 4] # Returns a tuple of all slices along a given dimension, already without it. xmin, ymin, xmax, ymax = boxes.unbind(1) xmin = xmin * ratios_width xmax = xmax * ratios_width ymin = ymin * ratios_height ymax = ymax * ratios_height return torch.stack((xmin, ymin, xmax, ymax), dim=1) ================================================ FILE: pytorch_object_detection/faster_rcnn/pascal_voc_classes.json ================================================ { "aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, "bottle": 5, "bus": 6, "car": 7, "cat": 8, "chair": 9, "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, "motorbike": 14, "person": 15, "pottedplant": 16, "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20 } ================================================ FILE: pytorch_object_detection/faster_rcnn/plot_curve.py ================================================ import datetime import matplotlib.pyplot as plt def plot_loss_and_lr(train_loss, learning_rate): try: x = list(range(len(train_loss))) fig, ax1 = plt.subplots(1, 1) ax1.plot(x, train_loss, 'r', label='loss') ax1.set_xlabel("step") ax1.set_ylabel("loss") ax1.set_title("Train Loss and lr") plt.legend(loc='best') ax2 = ax1.twinx() ax2.plot(x, learning_rate, label='lr') ax2.set_ylabel("learning rate") ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 plt.legend(loc='best') handles1, labels1 = ax1.get_legend_handles_labels() handles2, labels2 = ax2.get_legend_handles_labels() plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) plt.close() print("successful save loss curve! ") except Exception as e: print(e) def plot_map(mAP): try: x = list(range(len(mAP))) plt.plot(x, mAP, label='mAp') plt.xlabel('epoch') plt.ylabel('mAP') plt.title('Eval mAP') plt.xlim(0, len(mAP)) plt.legend(loc='best') plt.savefig('./mAP.png') plt.close() print("successful save mAP curve!") except Exception as e: print(e) ================================================ FILE: pytorch_object_detection/faster_rcnn/predict.py ================================================ import os import time import json import torch import torchvision from PIL import Image import matplotlib.pyplot as plt from torchvision import transforms from network_files import FasterRCNN, FastRCNNPredictor, AnchorsGenerator from backbone import resnet50_fpn_backbone, MobileNetV2 from draw_box_utils import draw_objs def create_model(num_classes): # mobileNetv2+faster_RCNN # backbone = MobileNetV2().features # backbone.out_channels = 1280 # # anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), # aspect_ratios=((0.5, 1.0, 2.0),)) # # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # output_size=[7, 7], # sampling_ratio=2) # # model = FasterRCNN(backbone=backbone, # num_classes=num_classes, # rpn_anchor_generator=anchor_generator, # box_roi_pool=roi_pooler) # resNet50+fpn+faster_RCNN # 注意,这里的norm_layer要和训练脚本中保持一致 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d) model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_score_thresh=0.5) return model def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = create_model(num_classes=21) # load train weights weights_path = "./save_weights/model.pth" assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: class_dict = json.load(f) category_index = {str(v): str(k) for k, v in class_dict.items()} # load image original_img = Image.open("./test.jpg") # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() predictions = model(img.to(device))[0] t_end = time_synchronized() print("inference+NMS time: {}".format(t_end - t_start)) predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") plot_img = draw_objs(original_img, predict_boxes, predict_classes, predict_scores, category_index=category_index, box_thresh=0.5, line_thickness=3, font='arial.ttf', font_size=20) plt.imshow(plot_img) plt.show() # 保存预测的图片结果 plot_img.save("test_result.jpg") if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/faster_rcnn/record_mAP.txt ================================================ COCO results: Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.526 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.804 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.586 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.580 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.454 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.639 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.646 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.540 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693 mAP(IoU=0.5) for each category: aeroplane : 0.8759546352558178 bicycle : 0.8554609242543677 bird : 0.8434943725365999 boat : 0.6753024837855667 bottle : 0.7185899054232459 bus : 0.8691082170432654 car : 0.8771002682431779 cat : 0.9169138943375639 chair : 0.6403466317122392 cow : 0.8285552434280278 diningtable : 0.6437938565684241 dog : 0.8745793980119227 horse : 0.8718238708874728 motorbike : 0.8910672301923952 person : 0.9047338725598096 pottedplant : 0.5808810399193133 sheep : 0.86045368568359 sofa : 0.7239390963388067 train : 0.8652277764020805 tvmonitor : 0.7683550206571649 ================================================ FILE: pytorch_object_detection/faster_rcnn/requirements.txt ================================================ lxml matplotlib numpy tqdm torch==1.7.1 torchvision==0.8.2 pycocotools Pillow ================================================ FILE: pytorch_object_detection/faster_rcnn/split_data.py ================================================ import os import random def main(): random.seed(0) # 设置随机种子,保证随机结果可复现 files_path = "./VOCdevkit/VOC2012/Annotations" assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path) val_rate = 0.5 files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)]) files_num = len(files_name) val_index = random.sample(range(0, files_num), k=int(files_num*val_rate)) train_files = [] val_files = [] for index, file_name in enumerate(files_name): if index in val_index: val_files.append(file_name) else: train_files.append(file_name) try: train_f = open("train.txt", "x") eval_f = open("val.txt", "x") train_f.write("\n".join(train_files)) eval_f.write("\n".join(val_files)) except FileExistsError as e: print(e) exit(1) if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/faster_rcnn/train_mobilenetv2.py ================================================ import os import datetime import torch import torchvision import transforms from network_files import FasterRCNN, AnchorsGenerator from backbone import MobileNetV2, vgg from my_dataset import VOCDataSet from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from train_utils import train_eval_utils as utils def create_model(num_classes): # https://download.pytorch.org/models/vgg16-397923af.pth # 如果使用vgg16的话就下载对应预训练权重并取消下面注释,接着把mobilenetv2模型对应的两行代码注释掉 # vgg_feature = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features # backbone = torch.nn.Sequential(*list(vgg_feature._modules.values())[:-1]) # 删除features中最后一个Maxpool层 # backbone.out_channels = 512 # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features backbone.out_channels = 1280 # 设置对应backbone输出特征矩阵的channels anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists("save_weights"): os.makedirs("save_weights") data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = "./" # VOCdevkit aspect_ratio_group_factor = 3 batch_size = 8 amp = False # 是否使用混合精度训练,需要GPU支持 # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, batch_size) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + 20 classes model = create_model(num_classes=21) # print(model) model.to(device) scaler = torch.cuda.amp.GradScaler() if amp else None train_loss = [] learning_rate = [] val_map = [] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # first frozen backbone and train 5 epochs # # 首先冻结前置特征提取网络权重(backbone),训练rpn以及最终预测网络部分 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # for param in model.backbone.parameters(): param.requires_grad = False # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) init_epochs = 5 for epoch in range(init_epochs): # train for one epoch, printing every 10 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP torch.save(model.state_dict(), "./save_weights/pretrain.pth") # # # # # # # # # # # # # # # # # # # # # # # # # # # # # second unfrozen backbone and train all network # # 解冻前置特征提取网络权重(backbone),接着训练整个网络权重 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 冻结backbone部分底层权重 for name, parameter in model.backbone.named_parameters(): split_name = name.split(".")[0] if split_name in ["0", "1", "2", "3"]: parameter.requires_grad = False else: parameter.requires_grad = True # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.33) num_epochs = 20 for epoch in range(init_epochs, num_epochs+init_epochs, 1): # train for one epoch, printing every 50 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights # 仅保存最后5个epoch的权重 if epoch in range(num_epochs+init_epochs)[-5:]: save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} torch.save(save_files, "./save_weights/mobile-model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": main() ================================================ FILE: pytorch_object_detection/faster_rcnn/train_multi_GPU.py ================================================ import time import os import datetime import torch import transforms from my_dataset import VOCDataSet from backbone import resnet50_fpn_backbone from network_files import FasterRCNN, FastRCNNPredictor import train_utils.train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir def create_model(num_classes): # 如果显存很小,建议使用默认的FrozenBatchNorm2d # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1'], 5代表全部训练 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d, trainable_layers=3) # 训练自己数据集时不要修改这里的91,修改的是传入的num_classes参数 model = FasterRCNN(backbone=backbone, num_classes=91) # 载入预训练模型权重 # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth weights_dict = torch.load("./backbone/fasterrcnn_resnet50_fpn_coco.pth", map_location='cpu') missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # Data loading code print("Loading data") data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) if args.aspect_ratio_group_factor >= 0: # 统计所有图像比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) data_loader_test = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + 20 classes model = create_model(num_classes=args.num_classes + 1) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: utils.evaluate(model, data_loader_test, device=device) return train_loss = [] learning_rate = [] val_map = [] print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update learning rate lr_scheduler.step() # evaluate after every epoch coco_info = utils.evaluate(model, data_loader_test, device=device) val_map.append(coco_info[1]) # pascal mAP # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") if args.output_dir: # 只在主节点上执行保存权重操作 save_files = { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() save_on_master(save_files, os.path.join(args.output_dir, f'model_{epoch}.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if args.rank in [-1, 0]: # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=20, type=int, metavar='N', help='number of total epochs to run') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 学习率,这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU parser.add_argument('--lr', default=0.02, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.StepLR的参数 parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 开启的进程数(注意不是线程) parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') parser.add_argument("--sync-bn", dest="sync_bn", help="Use sync batch norm", type=bool, default=False) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/faster_rcnn/train_res50_fpn.py ================================================ import os import datetime import torch import transforms from network_files import FasterRCNN, FastRCNNPredictor from backbone import resnet50_fpn_backbone from my_dataset import VOCDataSet from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from train_utils import train_eval_utils as utils def create_model(num_classes, load_pretrain_weights=True): # 注意,这里的backbone默认使用的是FrozenBatchNorm2d,即不会去更新bn参数 # 目的是为了防止batch_size太小导致效果更差(如果显存很小,建议使用默认的FrozenBatchNorm2d) # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1'], 5代表全部训练 # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth backbone = resnet50_fpn_backbone(pretrain_path="./backbone/resnet50.pth", norm_layer=torch.nn.BatchNorm2d, trainable_layers=3) # 训练自己数据集时不要修改这里的91,修改的是传入的num_classes参数 model = FasterRCNN(backbone=backbone, num_classes=91) if load_pretrain_weights: # 载入预训练模型权重 # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth weights_dict = torch.load("./backbone/fasterrcnn_resnet50_fpn_coco.pth", map_location='cpu') missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_data_set_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + 20 classes model = create_model(num_classes=args.num_classes + 1) # print(model) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.33) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if args.resume != "": checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) print("the training process from epoch{}...".format(args.start_epoch)) train_loss = [] learning_rate = [] val_map = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 10 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device=device, epoch=epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_set_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=15, type=int, metavar='N', help='number of total epochs to run') # 学习率 parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练的batch size parser.add_argument('--batch_size', default=8, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/faster_rcnn/train_utils/__init__.py ================================================ from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups from .distributed_utils import init_distributed_mode, save_on_master, mkdir from .coco_utils import get_coco_api_from_dataset from .coco_eval import CocoEvaluator ================================================ FILE: pytorch_object_detection/faster_rcnn/train_utils/coco_eval.py ================================================ import json from collections import defaultdict import numpy as np import copy import torch import torch._six from pycocotools.cocoeval import COCOeval from pycocotools.coco import COCO import pycocotools.mask as mask_util from train_utils.distributed_utils import all_gather class CocoEvaluator(object): def __init__(self, coco_gt, iou_types): assert isinstance(iou_types, (list, tuple)) coco_gt = copy.deepcopy(coco_gt) self.coco_gt = coco_gt self.iou_types = iou_types self.coco_eval = {} for iou_type in iou_types: self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) self.img_ids = [] self.eval_imgs = {k: [] for k in iou_types} def update(self, predictions): img_ids = list(np.unique(list(predictions.keys()))) self.img_ids.extend(img_ids) for iou_type in self.iou_types: results = self.prepare(predictions, iou_type) coco_dt = loadRes(self.coco_gt, results) if results else COCO() coco_eval = self.coco_eval[iou_type] coco_eval.cocoDt = coco_dt coco_eval.params.imgIds = list(img_ids) img_ids, eval_imgs = evaluate(coco_eval) self.eval_imgs[iou_type].append(eval_imgs) def synchronize_between_processes(self): for iou_type in self.iou_types: self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) def accumulate(self): for coco_eval in self.coco_eval.values(): coco_eval.accumulate() def summarize(self): for iou_type, coco_eval in self.coco_eval.items(): print("IoU metric: {}".format(iou_type)) coco_eval.summarize() def prepare(self, predictions, iou_type): if iou_type == "bbox": return self.prepare_for_coco_detection(predictions) elif iou_type == "segm": return self.prepare_for_coco_segmentation(predictions) elif iou_type == "keypoints": return self.prepare_for_coco_keypoint(predictions) else: raise ValueError("Unknown iou type {}".format(iou_type)) def prepare_for_coco_detection(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes) ] ) return coco_results def prepare_for_coco_segmentation(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue scores = prediction["scores"] labels = prediction["labels"] masks = prediction["masks"] masks = masks > 0.5 scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() rles = [ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "segmentation": rle, "score": scores[k], } for k, rle in enumerate(rles) ] ) return coco_results def prepare_for_coco_keypoint(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() keypoints = prediction["keypoints"] keypoints = keypoints.flatten(start_dim=1).tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], 'keypoints': keypoint, "score": scores[k], } for k, keypoint in enumerate(keypoints) ] ) return coco_results def convert_to_xywh(boxes): xmin, ymin, xmax, ymax = boxes.unbind(1) return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) def merge(img_ids, eval_imgs): all_img_ids = all_gather(img_ids) all_eval_imgs = all_gather(eval_imgs) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_imgs = [] for p in all_eval_imgs: merged_eval_imgs.append(p) merged_img_ids = np.array(merged_img_ids) merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) # keep only unique (and in sorted order) images merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_imgs = merged_eval_imgs[..., idx] return merged_img_ids, merged_eval_imgs def create_common_coco_eval(coco_eval, img_ids, eval_imgs): img_ids, eval_imgs = merge(img_ids, eval_imgs) img_ids = list(img_ids) eval_imgs = list(eval_imgs.flatten()) coco_eval.evalImgs = eval_imgs coco_eval.params.imgIds = img_ids coco_eval._paramsEval = copy.deepcopy(coco_eval.params) ################################################################# # From pycocotools, just removed the prints and fixed # a Python3 bug about unicode not defined ################################################################# # Ideally, pycocotools wouldn't have hard-coded prints # so that we could avoid copy-pasting those two functions def createIndex(self): # create index # print('creating index...') anns, cats, imgs = {}, {}, {} imgToAnns, catToImgs = defaultdict(list), defaultdict(list) if 'annotations' in self.dataset: for ann in self.dataset['annotations']: imgToAnns[ann['image_id']].append(ann) anns[ann['id']] = ann if 'images' in self.dataset: for img in self.dataset['images']: imgs[img['id']] = img if 'categories' in self.dataset: for cat in self.dataset['categories']: cats[cat['id']] = cat if 'annotations' in self.dataset and 'categories' in self.dataset: for ann in self.dataset['annotations']: catToImgs[ann['category_id']].append(ann['image_id']) # print('index created!') # create class members self.anns = anns self.imgToAnns = imgToAnns self.catToImgs = catToImgs self.imgs = imgs self.cats = cats maskUtils = mask_util def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] # print('Loading and preparing results...') # tic = time.time() if isinstance(resFile, torch._six.string_classes): anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if 'bbox' not in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x2 - x1) * (y2 - y1) ann['id'] = id + 1 ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns createIndex(res) return res def evaluate(self): ''' Run per image evaluation on given images and store results (a list of dict) in self.evalImgs :return: None ''' # tic = time.time() # print('Running per image evaluation...') p = self.params # add backward compatibility if useSegm is specified in params if p.useSegm is not None: p.iouType = 'segm' if p.useSegm == 1 else 'bbox' print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) # print('Evaluate annotation type *{}*'.format(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) p.maxDets = sorted(p.maxDets) self.params = p self._prepare() # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] if p.iouType == 'segm' or p.iouType == 'bbox': computeIoU = self.computeIoU elif p.iouType == 'keypoints': computeIoU = self.computeOks self.ious = { (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds} evaluateImg = self.evaluateImg maxDet = p.maxDets[-1] evalImgs = [ evaluateImg(imgId, catId, areaRng, maxDet) for catId in catIds for areaRng in p.areaRng for imgId in p.imgIds ] # this is NOT in the pycocotools code, but could be done outside evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) self._paramsEval = copy.deepcopy(self.params) # toc = time.time() # print('DONE (t={:0.2f}s).'.format(toc-tic)) return p.imgIds, evalImgs ################################################################# # end of straight copy from pycocotools, just removing the prints ################################################################# ================================================ FILE: pytorch_object_detection/faster_rcnn/train_utils/coco_utils.py ================================================ import torch import torchvision import torch.utils.data from pycocotools.coco import COCO def convert_to_coco_api(ds): coco_ds = COCO() # annotation IDs need to start at 1, not 0 ann_id = 1 dataset = {'images': [], 'categories': [], 'annotations': []} categories = set() for img_idx in range(len(ds)): # find better way to get target hw, targets = ds.coco_index(img_idx) image_id = targets["image_id"].item() img_dict = {} img_dict['id'] = image_id img_dict['height'] = hw[0] img_dict['width'] = hw[1] dataset['images'].append(img_dict) bboxes = targets["boxes"] bboxes[:, 2:] -= bboxes[:, :2] bboxes = bboxes.tolist() labels = targets['labels'].tolist() areas = targets['area'].tolist() iscrowd = targets['iscrowd'].tolist() num_objs = len(bboxes) for i in range(num_objs): ann = {} ann['image_id'] = image_id ann['bbox'] = bboxes[i] ann['category_id'] = labels[i] categories.add(labels[i]) ann['area'] = areas[i] ann['iscrowd'] = iscrowd[i] ann['id'] = ann_id dataset['annotations'].append(ann) ann_id += 1 dataset['categories'] = [{'id': i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds def get_coco_api_from_dataset(dataset): for _ in range(10): if isinstance(dataset, torchvision.datasets.CocoDetection): break if isinstance(dataset, torch.utils.data.Subset): dataset = dataset.dataset if isinstance(dataset, torchvision.datasets.CocoDetection): return dataset.coco return convert_to_coco_api(dataset) ================================================ FILE: pytorch_object_detection/faster_rcnn/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() if world_size == 1: return [data] # serialized to a Tensor buffer = pickle.dumps(data) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to("cuda") # obtain Tensor size of each rank local_size = torch.tensor([tensor.numel()], device="cuda") size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] dist.all_gather(size_list, local_size) size_list = [int(size.item()) for size in size_list] max_size = max(size_list) # receiving Tensor from all ranks # we pad the tensor because torch all_gather does not support # gathering tensors of different shapes tensor_list = [] for _ in size_list: tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) if local_size != max_size: padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") tensor = torch.cat((tensor, padding), dim=0) dist.all_gather(tensor_list, tensor) data_list = [] for size, tensor in zip(size_list, tensor_list): buffer = tensor.cpu().numpy().tobytes()[:size] data_list.append(pickle.loads(buffer)) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # 使用torch1.9或以上时建议加上device_ids=[args.rank] torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_object_detection/faster_rcnn/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k等份(2k+1个点,2k个区间) bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_object_detection/faster_rcnn/train_utils/train_eval_utils.py ================================================ import math import sys import time import torch from .coco_utils import get_coco_api_from_dataset from .coco_eval import CocoEvaluator import train_utils.distributed_utils as utils def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mloss = torch.zeros(1).to(device) # mean losses for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with torch.cuda.amp.autocast(enabled=scaler is not None): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() if scaler is not None: scaler.scale(losses).backward() scaler.step(optimizer) scaler.update() else: losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist() # numpy to list return coco_info def _get_iou_types(model): model_without_ddp = model if isinstance(model, torch.nn.parallel.DistributedDataParallel): model_without_ddp = model.module iou_types = ["bbox"] return iou_types ================================================ FILE: pytorch_object_detection/faster_rcnn/transforms.py ================================================ import random from torchvision.transforms import functional as F class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class ToTensor(object): """将PIL图像转为Tensor""" def __call__(self, image, target): image = F.to_tensor(image) return image, target class RandomHorizontalFlip(object): """随机水平翻转图像以及bboxes""" def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: height, width = image.shape[-2:] image = image.flip(-1) # 水平翻转图片 bbox = target["boxes"] # bbox: xmin, ymin, xmax, ymax bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 target["boxes"] = bbox return image, target ================================================ FILE: pytorch_object_detection/faster_rcnn/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 以及每个类别的mAP(IoU=0.5) """ import os import json import torch from tqdm import tqdm import numpy as np import transforms from network_files import FasterRCNN from backbone import resnet50_fpn_backbone from my_dataset import VOCDataSet from train_utils import get_coco_api_from_dataset, CocoEvaluator def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 12, [""] * 12 stats[0], print_list[0] = _summarize(1) stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0]) stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1]) stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2]) stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = { "val": transforms.Compose([transforms.ToTensor()]) } # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: class_dict = json.load(f) category_index = {v: k for k, v in class_dict.items()} VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + 20 classes # 注意,这里的norm_layer要和训练脚本中保持一致 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d) model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1) # 载入你自己训练好的模型权重 weights_path = parser_data.weights_path assert os.path.exists(weights_path), "not found {} file.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) # print(model) model.to(device) # evaluate on the test dataset coco = get_coco_api_from_dataset(val_dataset) iou_types = ["bbox"] coco_evaluator = CocoEvaluator(coco, iou_types) cpu_device = torch.device("cpu") model.eval() with torch.no_grad(): for image, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_eval = coco_evaluator.coco_eval["bbox"] # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_eval) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_eval, catId=i) voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = ["COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc] f.write("\n".join(record_lines)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数 parser.add_argument('--num-classes', type=int, default='20', help='number of classes') # 数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data/', help='dataset root') # 训练好的权重文件 parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights') # batch size parser.add_argument('--batch_size', default=1, type=int, metavar='N', help='batch size when validation.') args = parser.parse_args() main(args) ================================================ FILE: pytorch_object_detection/mask_rcnn/README.md ================================================ # Mask R-CNN ## 该项目参考自pytorch官方torchvision模块中的源码(使用pycocotools处略有不同) * https://github.com/pytorch/vision/tree/master/references/detection ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10或以上 * pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs)) * Ubuntu或Centos(不建议Windows) * 最好使用GPU训练 * 详细环境配置见`requirements.txt` ## 文件结构: ``` ├── backbone: 特征提取网络 ├── network_files: Mask R-CNN网络 ├── train_utils: 训练验证相关模块(包括coco验证相关) ├── my_dataset_coco.py: 自定义dataset用于读取COCO2017数据集 ├── my_dataset_voc.py: 自定义dataset用于读取Pascal VOC数据集 ├── train.py: 单GPU/CPU训练脚本 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测 ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 └── transforms.py: 数据预处理(随机水平翻转图像以及bboxes、将PIL图像转为Tensor) ``` ## 预训练权重下载地址(下载后放入当前文件夹中): * Resnet50预训练权重 https://download.pytorch.org/models/resnet50-0676ba61.pth (注意,下载预训练权重后要重命名, 比如在train.py中读取的是`resnet50.pth`文件,不是`resnet50-0676ba61.pth`) * Mask R-CNN(Resnet50+FPN)预训练权重 https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth (注意, 载预训练权重后要重命名,比如在train.py中读取的是`maskrcnn_resnet50_fpn_coco.pth`文件,不是`maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth`) ## 数据集,本例程使用的有COCO2017数据集和Pascal VOC2012数据集 ### COCO2017数据集 * COCO官网地址:https://cocodataset.org/ * 对数据集不了解的可以看下我写的博文:https://blog.csdn.net/qq_37541097/article/details/113247318 * 这里以下载coco2017数据集为例,主要下载三个文件: * `2017 Train images [118K/18GB]`:训练过程中使用到的所有图像文件 * `2017 Val images [5K/1GB]`:验证过程中使用到的所有图像文件 * `2017 Train/Val annotations [241MB]`:对应训练集和验证集的标注json文件 * 都解压到`coco2017`文件夹下,可得到如下文件夹结构: ``` ├── coco2017: 数据集根目录 ├── train2017: 所有训练图像文件夹(118287张) ├── val2017: 所有验证图像文件夹(5000张) └── annotations: 对应标注文件夹 ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件 ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件 ├── captions_train2017.json: 对应图像描述的训练集标注文件 ├── captions_val2017.json: 对应图像描述的验证集标注文件 ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件 └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹 ``` ### Pascal VOC2012数据集 * 数据集下载地址: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit * 对数据集不了解的可以看下我写的博文:https://blog.csdn.net/qq_37541097/article/details/115787033 * 解压后得到的文件夹结构如下: ``` VOCdevkit └── VOC2012 ├── Annotations 所有的图像标注信息(XML文件) ├── ImageSets │ ├── Action 人的行为动作图像信息 │ ├── Layout 人的各个部位图像信息 │ │ │ ├── Main 目标检测分类图像信息 │ │ ├── train.txt 训练集(5717) │ │ ├── val.txt 验证集(5823) │ │ └── trainval.txt 训练集+验证集(11540) │ │ │ └── Segmentation 目标分割图像信息 │ ├── train.txt 训练集(1464) │ ├── val.txt 验证集(1449) │ └── trainval.txt 训练集+验证集(2913) │ ├── JPEGImages 所有图像文件 ├── SegmentationClass 语义分割png图(基于类别) └── SegmentationObject 实例分割png图(基于目标) ``` ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 确保设置好`--num-classes`和`--data-path` * 若要使用单GPU训练直接使用train.py训练脚本 * 若要使用多GPU训练,使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py` ## 注意事项 1. 在使用训练脚本时,注意要将`--data-path`设置为自己存放数据集的**根目录**: ``` # 假设要使用COCO数据集,启用自定义数据集读取CocoDetection并将数据集解压到成/data/coco2017目录下 python train.py --data-path /data/coco2017 # 假设要使用Pascal VOC数据集,启用自定义数据集读取VOCInstances并数据集解压到成/data/VOCdevkit目录下 python train.py --data-path /data/VOCdevkit ``` 2. 如果倍增`batch_size`,建议学习率也跟着倍增。假设将`batch_size`从4设置成8,那么学习率`lr`从0.004设置成0.008 3. 如果使用Batch Normalization模块时,`batch_size`不能小于4,否则效果会变差。**如果显存不够,batch_size必须小于4时**,建议在创建`resnet50_fpn_backbone`时, 将`norm_layer`设置成`FrozenBatchNorm2d`或将`trainable_layers`设置成0(即冻结整个`backbone`) 4. 训练过程中保存的`det_results.txt`(目标检测任务)以及`seg_results.txt`(实例分割任务)是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 5. 在使用预测脚本时,要将`weights_path`设置为你自己生成的权重路径。 6. 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时需要修改`--num-classes`、`--data-path`、`--weights-path`以及 `--label-json-path`(该参数是根据训练的数据集设置的)。其他代码尽量不要改动 ## 复现结果 在COCO2017数据集上进行复现,训练过程中仅载入Resnet50的预训练权重,训练26个epochs。训练采用指令如下: ``` torchrun --nproc_per_node=8 train_multi_GPU.py --batch-size 8 --lr 0.08 --pretrain False --amp True ``` 训练得到权重下载地址: https://pan.baidu.com/s/1qpXUIsvnj8RHY-V05J-mnA 密码: 63d5 在COCO2017验证集上的mAP(目标检测任务): ``` Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.381 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.588 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.411 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.215 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.492 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.315 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.499 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.523 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.565 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.666 ``` 在COCO2017验证集上的mAP(实例分割任务): ``` Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.340 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.552 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.361 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.151 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.369 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.500 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.290 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.449 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.468 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.266 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.509 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.619 ``` ## 如果对Mask RCNN原理不是很理解可参考我的bilibili https://www.bilibili.com/video/BV1ZY411774T ================================================ FILE: pytorch_object_detection/mask_rcnn/backbone/__init__.py ================================================ from .resnet50_fpn_model import resnet50_fpn_backbone ================================================ FILE: pytorch_object_detection/mask_rcnn/backbone/feature_pyramid_network.py ================================================ from collections import OrderedDict import torch.nn as nn import torch from torch import Tensor import torch.nn.functional as F from torch.jit.annotations import Tuple, List, Dict class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Arguments: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model, return_layers): if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} layers = OrderedDict() # 遍历模型子模块按顺序存入有序字典 # 只保存layer4及其之前的结构,舍去之后不用的结构 for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super().__init__(layers) self.return_layers = orig_return_layers def forward(self, x): out = OrderedDict() # 依次遍历模型的所有子模块,并进行正向传播, # 收集layer1, layer2, layer3, layer4的输出 for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class BackboneWithFPN(nn.Module): """ Adds a FPN on top of a model. Internally, it uses torchvision.models._utils.IntermediateLayerGetter to extract a submodel that returns the feature maps specified in return_layers. The same limitations of IntermediatLayerGetter apply here. Arguments: backbone (nn.Module) return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). in_channels_list (List[int]): number of channels for each feature map that is returned, in the order they are present in the OrderedDict out_channels (int): number of channels in the FPN. extra_blocks: ExtraFPNBlock Attributes: out_channels (int): the number of channels in the FPN """ def __init__(self, backbone: nn.Module, return_layers=None, in_channels_list=None, out_channels=256, extra_blocks=None, re_getter=True): super().__init__() if extra_blocks is None: extra_blocks = LastLevelMaxPool() if re_getter: assert return_layers is not None self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) else: self.body = backbone self.fpn = FeaturePyramidNetwork( in_channels_list=in_channels_list, out_channels=out_channels, extra_blocks=extra_blocks, ) self.out_channels = out_channels def forward(self, x): x = self.body(x) x = self.fpn(x) return x class FeaturePyramidNetwork(nn.Module): """ Module that adds a FPN from on top of a set of feature maps. This is based on `"Feature Pyramid Network for Object Detection" `_. The feature maps are currently supposed to be in increasing depth order. The input to the model is expected to be an OrderedDict[Tensor], containing the feature maps on top of which the FPN will be added. Arguments: in_channels_list (list[int]): number of channels for each feature map that is passed to the module out_channels (int): number of channels of the FPN representation extra_blocks (ExtraFPNBlock or None): if provided, extra operations will be performed. It is expected to take the fpn features, the original features and the names of the original features as input, and returns a new list of feature maps and their corresponding names """ def __init__(self, in_channels_list, out_channels, extra_blocks=None): super().__init__() # 用来调整resnet特征矩阵(layer1,2,3,4)的channel(kernel_size=1) self.inner_blocks = nn.ModuleList() # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵 self.layer_blocks = nn.ModuleList() for in_channels in in_channels_list: if in_channels == 0: continue inner_block_module = nn.Conv2d(in_channels, out_channels, 1) layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) self.inner_blocks.append(inner_block_module) self.layer_blocks.append(layer_block_module) # initialize parameters now to avoid modifying the initialization of top_blocks for m in self.children(): if isinstance(m, nn.Conv2d): nn.init.kaiming_uniform_(m.weight, a=1) nn.init.constant_(m.bias, 0) self.extra_blocks = extra_blocks def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.inner_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.inner_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.inner_blocks: if i == idx: out = module(x) i += 1 return out def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.layer_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.layer_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.layer_blocks: if i == idx: out = module(x) i += 1 return out def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: """ Computes the FPN for a set of feature maps. Arguments: x (OrderedDict[Tensor]): feature maps for each feature level. Returns: results (OrderedDict[Tensor]): feature maps after FPN layers. They are ordered from highest resolution first. """ # unpack OrderedDict into two lists for easier handling names = list(x.keys()) x = list(x.values()) # 将resnet layer4的channel调整到指定的out_channels # last_inner = self.inner_blocks[-1](x[-1]) last_inner = self.get_result_from_inner_blocks(x[-1], -1) # result中保存着每个预测特征层 results = [] # 将layer4调整channel后的特征矩阵,通过3x3卷积后得到对应的预测特征矩阵 # results.append(self.layer_blocks[-1](last_inner)) results.append(self.get_result_from_layer_blocks(last_inner, -1)) for idx in range(len(x) - 2, -1, -1): inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) feat_shape = inner_lateral.shape[-2:] inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") last_inner = inner_lateral + inner_top_down results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) # 在layer4对应的预测特征层基础上生成预测特征矩阵5 if self.extra_blocks is not None: results, names = self.extra_blocks(results, x, names) # make it back an OrderedDict out = OrderedDict([(k, v) for k, v in zip(names, results)]) return out class LastLevelMaxPool(torch.nn.Module): """ Applies a max_pool2d on top of the last feature map """ def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]: names.append("pool") x.append(F.max_pool2d(x[-1], 1, 2, 0)) return x, names ================================================ FILE: pytorch_object_detection/mask_rcnn/backbone/resnet50_fpn_model.py ================================================ import os import torch import torch.nn as nn from torchvision.ops.misc import FrozenBatchNorm2d from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = norm_layer(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = norm_layer(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = norm_layer(out_channel * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): norm_layer = self._norm_layer downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), norm_layer(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, norm_layer=norm_layer)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, norm_layer=norm_layer)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def overwrite_eps(model, eps): """ This method overwrites the default eps values of all the FrozenBatchNorm2d layers of the model with the provided value. This is necessary to address the BC-breaking change introduced by the bug-fix at pytorch/vision#2933. The overwrite is applied only when the pretrained weights are loaded to maintain compatibility with previous versions. Args: model (nn.Module): The model on which we perform the overwrite. eps (float): The new value of eps. """ for module in model.modules(): if isinstance(module, FrozenBatchNorm2d): module.eps = eps def resnet50_fpn_backbone(pretrain_path="", norm_layer=nn.BatchNorm2d, trainable_layers=3, returned_layers=None, extra_blocks=None): """ 搭建resnet50_fpn——backbone Args: pretrain_path: resnet50的预训练权重,如果不使用就默认为空 norm_layer: 默认是nn.BatchNorm2d,如果GPU显存很小,batch_size不能设置很大, 建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d) (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) trainable_layers: 指定训练哪些层结构 returned_layers: 指定哪些层的输出需要返回 extra_blocks: 在输出的特征层基础上额外添加的层结构 Returns: """ resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3], include_top=False, norm_layer=norm_layer) if isinstance(norm_layer, FrozenBatchNorm2d): overwrite_eps(resnet_backbone, 0.0) if pretrain_path != "": assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path) # 载入预训练权重 print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False)) # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # 如果要训练所有层结构的话,不要忘了conv1后还有一个bn1 if trainable_layers == 5: layers_to_train.append("bn1") # freeze layers for name, parameter in resnet_backbone.named_parameters(): # 只训练不在layers_to_train列表中的层结构 if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) if extra_blocks is None: extra_blocks = LastLevelMaxPool() if returned_layers is None: returned_layers = [1, 2, 3, 4] # 返回的特征层个数肯定大于0小于5 assert min(returned_layers) > 0 and max(returned_layers) < 5 # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)} # in_channel 为layer4的输出特征矩阵channel = 2048 in_channels_stage2 = resnet_backbone.in_channel // 8 # 256 # 记录resnet50提供给fpn的每个特征层channel in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] # 通过fpn后得到的每个特征层的channel out_channels = 256 return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) ================================================ FILE: pytorch_object_detection/mask_rcnn/coco91_indices.json ================================================ { "1": "person", "2": "bicycle", "3": "car", "4": "motorcycle", "5": "airplane", "6": "bus", "7": "train", "8": "truck", "9": "boat", "10": "traffic light", "11": "fire hydrant", "12": "N/A", "13": "stop sign", "14": "parking meter", "15": "bench", "16": "bird", "17": "cat", "18": "dog", "19": "horse", "20": "sheep", "21": "cow", "22": "elephant", "23": "bear", "24": "zebra", "25": "giraffe", "26": "N/A", "27": "backpack", "28": "umbrella", "29": "N/A", "30": "N/A", "31": "handbag", "32": "tie", "33": "suitcase", "34": "frisbee", "35": "skis", "36": "snowboard", "37": "sports ball", "38": "kite", "39": "baseball bat", "40": "baseball glove", "41": "skateboard", "42": "surfboard", "43": "tennis racket", "44": "bottle", "45": "N/A", "46": "wine glass", "47": "cup", "48": "fork", "49": "knife", "50": "spoon", "51": "bowl", "52": "banana", "53": "apple", "54": "sandwich", "55": "orange", "56": "broccoli", "57": "carrot", "58": "hot dog", "59": "pizza", "60": "donut", "61": "cake", "62": "chair", "63": "couch", "64": "potted plant", "65": "bed", "66": "N/A", "67": "dining table", "68": "N/A", "69": "N/A", "70": "toilet", "71": "N/A", "72": "tv", "73": "laptop", "74": "mouse", "75": "remote", "76": "keyboard", "77": "cell phone", "78": "microwave", "79": "oven", "80": "toaster", "81": "sink", "82": "refrigerator", "83": "N/A", "84": "book", "85": "clock", "86": "vase", "87": "scissors", "88": "teddy bear", "89": "hair drier", "90": "toothbrush" } ================================================ FILE: pytorch_object_detection/mask_rcnn/det_results20220406-141544.txt ================================================ epoch:0 0.171 0.342 0.154 0.099 0.211 0.213 0.184 0.315 0.334 0.168 0.375 0.440 1.3826 0.08 epoch:1 0.230 0.419 0.230 0.132 0.266 0.288 0.224 0.374 0.395 0.216 0.435 0.512 1.0356 0.08 epoch:2 0.242 0.435 0.244 0.133 0.272 0.313 0.233 0.393 0.416 0.232 0.452 0.532 0.9718 0.08 epoch:3 0.261 0.456 0.269 0.145 0.284 0.326 0.248 0.415 0.440 0.260 0.475 0.550 0.9363 0.08 epoch:4 0.266 0.458 0.277 0.150 0.301 0.337 0.250 0.409 0.433 0.245 0.467 0.564 0.9145 0.08 epoch:5 0.272 0.465 0.286 0.155 0.309 0.348 0.251 0.407 0.429 0.247 0.461 0.561 0.8982 0.08 epoch:6 0.288 0.482 0.303 0.163 0.321 0.363 0.263 0.431 0.452 0.265 0.491 0.570 0.8859 0.08 epoch:7 0.287 0.483 0.302 0.164 0.320 0.363 0.268 0.432 0.454 0.268 0.483 0.584 0.8771 0.08 epoch:8 0.298 0.492 0.318 0.166 0.336 0.377 0.268 0.434 0.454 0.265 0.500 0.580 0.8685 0.08 epoch:9 0.289 0.484 0.306 0.156 0.325 0.374 0.263 0.428 0.450 0.252 0.490 0.589 0.8612 0.08 epoch:10 0.297 0.489 0.316 0.167 0.330 0.381 0.270 0.436 0.459 0.258 0.501 0.579 0.8547 0.08 epoch:11 0.299 0.494 0.317 0.171 0.335 0.382 0.272 0.439 0.461 0.276 0.501 0.586 0.8498 0.08 epoch:12 0.301 0.497 0.321 0.178 0.333 0.390 0.270 0.443 0.466 0.277 0.505 0.600 0.8461 0.08 epoch:13 0.307 0.503 0.327 0.175 0.345 0.388 0.276 0.441 0.465 0.269 0.510 0.574 0.8409 0.08 epoch:14 0.299 0.491 0.319 0.171 0.339 0.372 0.271 0.445 0.470 0.284 0.508 0.593 0.8355 0.08 epoch:15 0.306 0.503 0.324 0.166 0.342 0.396 0.278 0.443 0.468 0.271 0.511 0.598 0.8330 0.08 epoch:16 0.374 0.579 0.407 0.214 0.415 0.476 0.311 0.500 0.526 0.325 0.573 0.659 0.7421 0.008 epoch:17 0.379 0.587 0.409 0.214 0.420 0.484 0.316 0.502 0.528 0.322 0.569 0.668 0.7157 0.008 epoch:18 0.380 0.587 0.411 0.214 0.423 0.486 0.315 0.503 0.528 0.323 0.571 0.669 0.7016 0.008 epoch:19 0.381 0.588 0.413 0.216 0.422 0.490 0.317 0.508 0.532 0.332 0.574 0.676 0.6897 0.008 epoch:20 0.379 0.586 0.410 0.212 0.418 0.488 0.313 0.499 0.523 0.317 0.566 0.667 0.6802 0.008 epoch:21 0.378 0.587 0.408 0.210 0.418 0.488 0.314 0.496 0.520 0.314 0.560 0.667 0.6708 0.008 epoch:22 0.381 0.588 0.411 0.213 0.420 0.495 0.316 0.500 0.524 0.318 0.567 0.673 0.6497 0.0008 epoch:23 0.381 0.588 0.411 0.215 0.420 0.492 0.315 0.499 0.523 0.319 0.565 0.666 0.6447 0.0008 epoch:24 0.381 0.588 0.412 0.214 0.419 0.495 0.316 0.499 0.523 0.317 0.565 0.669 0.6421 0.0008 epoch:25 0.380 0.585 0.411 0.214 0.419 0.494 0.314 0.498 0.522 0.316 0.566 0.664 0.6398 0.0008 ================================================ FILE: pytorch_object_detection/mask_rcnn/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = True): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: pytorch_object_detection/mask_rcnn/my_dataset_coco.py ================================================ import os import json import torch from PIL import Image import torch.utils.data as data from pycocotools.coco import COCO from train_utils import coco_remove_images_without_annotations, convert_coco_poly_mask class CocoDetection(data.Dataset): """`MS Coco Detection `_ Dataset. Args: root (string): Root directory where images are downloaded to. dataset (string): train or val. transforms (callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. """ def __init__(self, root, dataset="train", transforms=None, years="2017"): super(CocoDetection, self).__init__() assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]' anno_file = f"instances_{dataset}{years}.json" assert os.path.exists(root), "file '{}' does not exist.".format(root) self.img_root = os.path.join(root, f"{dataset}{years}") assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root) self.anno_path = os.path.join(root, "annotations", anno_file) assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path) self.mode = dataset self.transforms = transforms self.coco = COCO(self.anno_path) # 获取coco数据索引与类别名称的关系 # 注意在object80中的索引并不是连续的,虽然只有80个类别,但索引还是按照stuff91来排序的 data_classes = dict([(v["id"], v["name"]) for k, v in self.coco.cats.items()]) max_index = max(data_classes.keys()) # 90 # 将缺失的类别名称设置成N/A coco_classes = {} for k in range(1, max_index + 1): if k in data_classes: coco_classes[k] = data_classes[k] else: coco_classes[k] = "N/A" if dataset == "train": json_str = json.dumps(coco_classes, indent=4) with open("coco91_indices.json", "w") as f: f.write(json_str) self.coco_classes = coco_classes ids = list(sorted(self.coco.imgs.keys())) if dataset == "train": # 移除没有目标,或者目标面积非常小的数据 valid_ids = coco_remove_images_without_annotations(self.coco, ids) self.ids = valid_ids else: self.ids = ids def parse_targets(self, img_id: int, coco_targets: list, w: int = None, h: int = None): assert w > 0 assert h > 0 # 只筛选出单个对象的情况 anno = [obj for obj in coco_targets if obj['iscrowd'] == 0] boxes = [obj["bbox"] for obj in anno] # guard against no boxes via resizing boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax] boxes[:, 2:] += boxes[:, :2] boxes[:, 0::2].clamp_(min=0, max=w) boxes[:, 1::2].clamp_(min=0, max=h) classes = [obj["category_id"] for obj in anno] classes = torch.tensor(classes, dtype=torch.int64) area = torch.tensor([obj["area"] for obj in anno]) iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) segmentations = [obj["segmentation"] for obj in anno] masks = convert_coco_poly_mask(segmentations, h, w) # 筛选出合法的目标,即x_max>x_min且y_max>y_min keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) boxes = boxes[keep] classes = classes[keep] masks = masks[keep] area = area[keep] iscrowd = iscrowd[keep] target = {} target["boxes"] = boxes target["labels"] = classes target["masks"] = masks target["image_id"] = torch.tensor([img_id]) # for conversion to coco api target["area"] = area target["iscrowd"] = iscrowd return target def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) coco_target = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] img = Image.open(os.path.join(self.img_root, path)).convert('RGB') w, h = img.size target = self.parse_targets(img_id, coco_target, w, h) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.ids) def get_height_and_width(self, index): coco = self.coco img_id = self.ids[index] img_info = coco.loadImgs(img_id)[0] w = img_info["width"] h = img_info["height"] return h, w @staticmethod def collate_fn(batch): return tuple(zip(*batch)) if __name__ == '__main__': train = CocoDetection("/data/coco2017", dataset="train") print(len(train)) t = train[0] ================================================ FILE: pytorch_object_detection/mask_rcnn/my_dataset_voc.py ================================================ import os import json from lxml import etree import numpy as np from PIL import Image import torch from torch.utils.data import Dataset from train_utils import convert_to_coco_api class VOCInstances(Dataset): def __init__(self, voc_root, year="2012", txt_name: str = "train.txt", transforms=None): super().__init__() if isinstance(year, int): year = str(year) assert year in ["2007", "2012"], "year must be in ['2007', '2012']" if "VOCdevkit" in voc_root: root = os.path.join(voc_root, f"VOC{year}") else: root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") assert os.path.exists(root), "path '{}' does not exist.".format(root) image_dir = os.path.join(root, 'JPEGImages') xml_dir = os.path.join(root, 'Annotations') mask_dir = os.path.join(root, 'SegmentationObject') txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) with open(os.path.join(txt_path), "r") as f: file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] # read class_indict json_file = 'pascal_voc_indices.json' assert os.path.exists(json_file), "{} file not exist.".format(json_file) with open(json_file, 'r') as f: idx2classes = json.load(f) self.class_dict = dict([(v, k) for k, v in idx2classes.items()]) self.images_path = [] # 存储图片路径 self.xmls_path = [] # 存储xml文件路径 self.xmls_info = [] # 存储解析的xml字典文件 self.masks_path = [] # 存储SegmentationObject图片路径 self.objects_bboxes = [] # 存储解析的目标boxes等信息 self.masks = [] # 存储读取的SegmentationObject图片信息 # 检查图片、xml文件以及mask是否都在 images_path = [os.path.join(image_dir, x + ".jpg") for x in file_names] xmls_path = [os.path.join(xml_dir, x + '.xml') for x in file_names] masks_path = [os.path.join(mask_dir, x + ".png") for x in file_names] for idx, (img_path, xml_path, mask_path) in enumerate(zip(images_path, xmls_path, masks_path)): assert os.path.exists(img_path), f"not find {img_path}" assert os.path.exists(xml_path), f"not find {xml_path}" assert os.path.exists(mask_path), f"not find {mask_path}" # 解析xml中bbox信息 with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) obs_dict = parse_xml_to_dict(xml)["annotation"] # 将xml文件解析成字典 obs_bboxes = parse_objects(obs_dict, xml_path, self.class_dict, idx) # 解析出目标信息 num_objs = obs_bboxes["boxes"].shape[0] # 读取SegmentationObject并检查是否和bboxes信息数量一致 instances_mask = Image.open(mask_path) instances_mask = np.array(instances_mask) instances_mask[instances_mask == 255] = 0 # 255为背景或者忽略掉的地方,这里为了方便直接设置为背景(0) # 需要检查一下标注的bbox个数是否和instances个数一致 num_instances = instances_mask.max() if num_objs != num_instances: print(f"warning: num_boxes:{num_objs} and num_instances:{num_instances} do not correspond. " f"skip image:{img_path}") continue self.images_path.append(img_path) self.xmls_path.append(xml_path) self.xmls_info.append(obs_dict) self.masks_path.append(mask_path) self.objects_bboxes.append(obs_bboxes) self.masks.append(instances_mask) self.transforms = transforms self.coco = convert_to_coco_api(self) def parse_mask(self, idx: int): mask = self.masks[idx] c = mask.max() # 有几个目标最大索引就等于几 masks = [] # 对每个目标的mask单独使用一个channel存放 for i in range(1, c+1): masks.append(mask == i) masks = np.stack(masks, axis=0) return torch.as_tensor(masks, dtype=torch.uint8) def __getitem__(self, idx): """ Args: idx (int): Index Returns: tuple: (image, target) where target is the image segmentation. """ img = Image.open(self.images_path[idx]).convert('RGB') target = self.objects_bboxes[idx] masks = self.parse_mask(idx) target["masks"] = masks if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.images_path) def get_height_and_width(self, idx): """方便统计所有图片的高宽比例信息""" # read xml data = self.xmls_info[idx] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) return data_height, data_width def get_annotations(self, idx): """方便构建COCO()""" data = self.xmls_info[idx] h = int(data["size"]["height"]) w = int(data["size"]["width"]) target = self.objects_bboxes[idx] masks = self.parse_mask(idx) target["masks"] = masks return target, h, w @staticmethod def collate_fn(batch): return tuple(zip(*batch)) def parse_xml_to_dict(xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def parse_objects(data: dict, xml_path: str, class_dict: dict, idx: int): """ 解析出bboxes、labels、iscrowd以及ares等信息 Args: data: 将xml解析成dict的Annotation数据 xml_path: 对应xml的文件路径 class_dict: 类别与索引对应关系 idx: 图片对应的索引 Returns: """ boxes = [] labels = [] iscrowd = [] assert "object" in data, "{} lack of object information.".format(xml_path) for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) # 进一步检查数据,有的标注信息中可能有w或h为0的情况,这样的数据会导致计算回归loss为nan if xmax <= xmin or ymax <= ymin: print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path)) continue boxes.append([xmin, ymin, xmax, ymax]) labels.append(int(class_dict[obj["name"]])) if "difficult" in obj: iscrowd.append(int(obj["difficult"])) else: iscrowd.append(0) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) return {"boxes": boxes, "labels": labels, "iscrowd": iscrowd, "image_id": image_id, "area": area} if __name__ == '__main__': dataset = VOCInstances(voc_root="/data/") print(len(dataset)) d1 = dataset[0] ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/__init__.py ================================================ from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor from .rpn_function import AnchorsGenerator from .mask_rcnn import MaskRCNN ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/boxes.py ================================================ import torch from typing import Tuple from torch import Tensor import torchvision def nms(boxes, scores, iou_threshold): # type: (Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). NMS iteratively removes lower scoring boxes which have an IoU greater than iou_threshold with another (higher scoring) box. Parameters ---------- boxes : Tensor[N, 4]) boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes iou_threshold : float discards all overlapping boxes with IoU > iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ return torch.ops.torchvision.nms(boxes, scores, iou_threshold) def batched_nms(boxes, scores, idxs, iou_threshold): # type: (Tensor, Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Parameters ---------- boxes : Tensor[N, 4] boxes where NMS will be performed. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes idxs : Tensor[N] indices of the categories for each one of the boxes. iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) # strategy: in order to perform NMS independently per class. # we add an offset to all the boxes. The offset is dependent # only on the class idx, and is large enough so that boxes # from different classes do not overlap # 获取所有boxes中最大的坐标值(xmin, ymin, xmax, ymax) max_coordinate = boxes.max() # to(): Performs Tensor dtype and/or device conversion # 为每一个类别/每一层生成一个很大的偏移量 # 这里的to只是让生成tensor的dytpe和device与boxes保持一致 offsets = idxs.to(boxes) * (max_coordinate + 1) # boxes加上对应层的偏移量后,保证不同类别/层之间boxes不会有重合的现象 boxes_for_nms = boxes + offsets[:, None] keep = nms(boxes_for_nms, scores, iou_threshold) return keep def remove_small_boxes(boxes, min_size): # type: (Tensor, float) -> Tensor """ Remove boxes which contains at least one side smaller than min_size. 移除宽高小于指定阈值的索引 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format min_size (float): minimum size Returns: keep (Tensor[K]): indices of the boxes that have both sides larger than min_size """ ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # 预测boxes的宽和高 # keep = (ws >= min_size) & (hs >= min_size) # 当满足宽,高都大于给定阈值时为True keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size)) # nonzero(): Returns a tensor containing the indices of all non-zero elements of input # keep = keep.nonzero().squeeze(1) keep = torch.where(keep)[0] return keep def clip_boxes_to_image(boxes, size): # type: (Tensor, Tuple[int, int]) -> Tensor """ Clip boxes so that they lie inside an image of size `size`. 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format size (Tuple[height, width]): size of the image Returns: clipped_boxes (Tensor[N, 4]) """ dim = boxes.dim() boxes_x = boxes[..., 0::2] # x1, x2 boxes_y = boxes[..., 1::2] # y1, y2 height, width = size if torchvision._is_tracing(): boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device)) else: boxes_x = boxes_x.clamp(min=0, max=width) # 限制x坐标范围在[0,width]之间 boxes_y = boxes_y.clamp(min=0, max=height) # 限制y坐标范围在[0,height]之间 clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) return clipped_boxes.reshape(boxes.shape) def box_area(boxes): """ Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. Arguments: boxes (Tensor[N, 4]): boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format Returns: area (Tensor[N]): area for each box """ return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def box_iou(boxes1, boxes2): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: boxes1 (Tensor[N, 4]) boxes2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ area1 = box_area(boxes1) area2 = box_area(boxes2) # When the shapes do not match, # the shape of the returned output tensor follows the broadcasting rules lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # left-top [N,M,2] rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # right-bottom [N,M,2] wh = (rb - lt).clamp(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] iou = inter / (area1[:, None] + area2 - inter) return iou ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/det_utils.py ================================================ import torch import math from typing import List, Tuple from torch import Tensor class BalancedPositiveNegativeSampler(object): """ This class samples batches, ensuring that they contain a fixed proportion of positives """ def __init__(self, batch_size_per_image, positive_fraction): # type: (int, float) -> None """ Arguments: batch_size_per_image (int): number of elements to be selected per image positive_fraction (float): percentage of positive elements per batch """ self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction def __call__(self, matched_idxs): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ Arguments: matched idxs: list of tensors containing -1, 0 or positive values. Each tensor corresponds to a specific image. -1 values are ignored, 0 are considered as negatives and > 0 as positives. Returns: pos_idx (list[tensor]) neg_idx (list[tensor]) Returns two lists of binary masks for each image. The first list contains the positive elements that were selected, and the second list the negative example. """ pos_idx = [] neg_idx = [] # 遍历每张图像的matched_idxs for matched_idxs_per_image in matched_idxs: # >= 1的为正样本, nonzero返回非零元素索引 # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0] # = 0的为负样本 # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0] # 指定正样本的数量 num_pos = int(self.batch_size_per_image * self.positive_fraction) # protect against not enough positive examples # 如果正样本数量不够就直接采用所有正样本 num_pos = min(positive.numel(), num_pos) # 指定负样本数量 num_neg = self.batch_size_per_image - num_pos # protect against not enough negative examples # 如果负样本数量不够就直接采用所有负样本 num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples # Returns a random permutation of integers from 0 to n - 1. # 随机选择指定数量的正负样本 perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] pos_idx_per_image = positive[perm1] neg_idx_per_image = negative[perm2] # create binary mask from indices pos_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) neg_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) pos_idx_per_image_mask[pos_idx_per_image] = 1 neg_idx_per_image_mask[neg_idx_per_image] = 1 pos_idx.append(pos_idx_per_image_mask) neg_idx.append(neg_idx_per_image_mask) return pos_idx, neg_idx @torch.jit._script_if_tracing def encode_boxes(reference_boxes, proposals, weights): # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes(gt) proposals (Tensor): boxes to be encoded(anchors) weights: """ # perform some unpacking to make it JIT-fusion friendly wx = weights[0] wy = weights[1] ww = weights[2] wh = weights[3] # unsqueeze() # Returns a new tensor with a dimension of size one inserted at the specified position. proposals_x1 = proposals[:, 0].unsqueeze(1) proposals_y1 = proposals[:, 1].unsqueeze(1) proposals_x2 = proposals[:, 2].unsqueeze(1) proposals_y2 = proposals[:, 3].unsqueeze(1) reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1) reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1) reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1) reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1) # implementation starts here # parse widths and heights ex_widths = proposals_x2 - proposals_x1 ex_heights = proposals_y2 - proposals_y1 # parse coordinate of center point ex_ctr_x = proposals_x1 + 0.5 * ex_widths ex_ctr_y = proposals_y1 + 0.5 * ex_heights gt_widths = reference_boxes_x2 - reference_boxes_x1 gt_heights = reference_boxes_y2 - reference_boxes_y1 gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * torch.log(gt_widths / ex_widths) targets_dh = wh * torch.log(gt_heights / ex_heights) targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets class BoxCoder(object): """ This class encodes and decodes a set of bounding boxes into the representation used for training the regressors. """ def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): # type: (Tuple[float, float, float, float], float) -> None """ Arguments: weights (4-element tuple) bbox_xform_clip (float) """ self.weights = weights self.bbox_xform_clip = bbox_xform_clip def encode(self, reference_boxes, proposals): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 结合anchors和与之对应的gt计算regression参数 Args: reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes proposals: List[Tensor] anchors/proposals Returns: regression parameters """ # 统计每张图像的anchors个数,方便后面拼接在一起处理后在分开 # reference_boxes和proposal数据结构相同 boxes_per_image = [len(b) for b in reference_boxes] reference_boxes = torch.cat(reference_boxes, dim=0) proposals = torch.cat(proposals, dim=0) # targets_dx, targets_dy, targets_dw, targets_dh targets = self.encode_single(reference_boxes, proposals) return targets.split(boxes_per_image, 0) def encode_single(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ dtype = reference_boxes.dtype device = reference_boxes.device weights = torch.as_tensor(self.weights, dtype=dtype, device=device) targets = encode_boxes(reference_boxes, proposals, weights) return targets def decode(self, rel_codes, boxes): # type: (Tensor, List[Tensor]) -> Tensor """ Args: rel_codes: bbox regression parameters boxes: anchors/proposals Returns: """ assert isinstance(boxes, (list, tuple)) assert isinstance(rel_codes, torch.Tensor) boxes_per_image = [b.size(0) for b in boxes] concat_boxes = torch.cat(boxes, dim=0) box_sum = 0 for val in boxes_per_image: box_sum += val # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标 pred_boxes = self.decode_single( rel_codes, concat_boxes ) # 防止pred_boxes为空时导致reshape报错 if box_sum > 0: pred_boxes = pred_boxes.reshape(box_sum, -1, 4) return pred_boxes def decode_single(self, rel_codes, boxes): """ From a set of original boxes and encoded relative box offsets, get the decoded boxes. Arguments: rel_codes (Tensor): encoded boxes (bbox regression parameters) boxes (Tensor): reference boxes (anchors/proposals) """ boxes = boxes.to(rel_codes.dtype) # xmin, ymin, xmax, ymax widths = boxes[:, 2] - boxes[:, 0] # anchor/proposal宽度 heights = boxes[:, 3] - boxes[:, 1] # anchor/proposal高度 ctr_x = boxes[:, 0] + 0.5 * widths # anchor/proposal中心x坐标 ctr_y = boxes[:, 1] + 0.5 * heights # anchor/proposal中心y坐标 wx, wy, ww, wh = self.weights # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5] dx = rel_codes[:, 0::4] / wx # 预测anchors/proposals的中心坐标x回归参数 dy = rel_codes[:, 1::4] / wy # 预测anchors/proposals的中心坐标y回归参数 dw = rel_codes[:, 2::4] / ww # 预测anchors/proposals的宽度回归参数 dh = rel_codes[:, 3::4] / wh # 预测anchors/proposals的高度回归参数 # limit max value, prevent sending too large values into torch.exp() # self.bbox_xform_clip=math.log(1000. / 16) 4.135 dw = torch.clamp(dw, max=self.bbox_xform_clip) dh = torch.clamp(dh, max=self.bbox_xform_clip) pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] pred_w = torch.exp(dw) * widths[:, None] pred_h = torch.exp(dh) * heights[:, None] # xmin pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymin pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h # xmax pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymax pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1) return pred_boxes class Matcher(object): BELOW_LOW_THRESHOLD = -1 BETWEEN_THRESHOLDS = -2 __annotations__ = { 'BELOW_LOW_THRESHOLD': int, 'BETWEEN_THRESHOLDS': int, } def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): # type: (float, float, bool) -> None """ Args: high_threshold (float): quality values greater than or equal to this value are candidate matches. low_threshold (float): a lower quality threshold used to stratify matches into three levels: 1) matches >= high_threshold 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) allow_low_quality_matches (bool): if True, produce additional matches for predictions that have only low-quality match candidates. See set_low_quality_matches_ for more details. """ self.BELOW_LOW_THRESHOLD = -1 self.BETWEEN_THRESHOLDS = -2 assert low_threshold <= high_threshold self.high_threshold = high_threshold # 0.7 self.low_threshold = low_threshold # 0.3 self.allow_low_quality_matches = allow_low_quality_matches def __call__(self, match_quality_matrix): """ 计算anchors与每个gtboxes匹配的iou最大值,并记录索引, iou= self.low_threshold) & ( matched_vals < self.high_threshold ) # iou小于low_threshold的matches索引置为-1 matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD # -1 # iou在[low_threshold, high_threshold]之间的matches索引置为-2 matches[between_thresholds] = self.BETWEEN_THRESHOLDS # -2 if self.allow_low_quality_matches: assert all_matches is not None self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) return matches def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): """ Produce additional matches for predictions that have only low-quality matches. Specifically, for each ground-truth find the set of predictions that have maximum overlap with it (including ties); for each prediction in that set, if it is unmatched, then match it to the ground-truth with which it has the highest quality value. """ # For each gt, find the prediction with which it has highest quality # 对于每个gt boxes寻找与其iou最大的anchor, # highest_quality_foreach_gt为匹配到的最大iou值 highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) # the dimension to reduce. # Find highest quality match available, even if it is low, including ties # 寻找每个gt boxes与其iou最大的anchor索引,一个gt匹配到的最大iou可能有多个anchor # gt_pred_pairs_of_highest_quality = torch.nonzero( # match_quality_matrix == highest_quality_foreach_gt[:, None] # ) gt_pred_pairs_of_highest_quality = torch.where( torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None]) ) # Example gt_pred_pairs_of_highest_quality: # tensor([[ 0, 39796], # [ 1, 32055], # [ 1, 32070], # [ 2, 39190], # [ 2, 40255], # [ 3, 40390], # [ 3, 41455], # [ 4, 45470], # [ 5, 45325], # [ 5, 46390]]) # Each row is a (gt index, prediction index) # Note how gt items 1, 2, 3, and 5 each have two ties # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要) # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] pre_inds_to_update = gt_pred_pairs_of_highest_quality[1] # 保留该anchor匹配gt最大iou的索引,即使iou低于设定的阈值 matches[pre_inds_to_update] = all_matches[pre_inds_to_update] def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ n = torch.abs(input - target) # cond = n < beta cond = torch.lt(n, beta) loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum() ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/faster_rcnn_framework.py ================================================ import warnings from collections import OrderedDict from typing import Tuple, List, Dict, Optional, Union import torch from torch import nn, Tensor import torch.nn.functional as F from torchvision.ops import MultiScaleRoIAlign from .roi_head import RoIHeads from .transform import GeneralizedRCNNTransform from .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork class FasterRCNNBase(nn.Module): """ Main class for Generalized R-CNN. Arguments: backbone (nn.Module): rpn (nn.Module): roi_heads (nn.Module): takes the features + the proposals from the RPN and computes detections / masks from it. transform (nn.Module): performs the data transformation from the inputs to feed into the model """ def __init__(self, backbone, rpn, roi_heads, transform): super(FasterRCNNBase, self).__init__() self.transform = transform self.backbone = backbone self.rpn = rpn self.roi_heads = roi_heads # used only on torchscript mode self._has_warned = False @torch.jit.unused def eager_outputs(self, losses, detections): # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]] if self.training: return losses return detections def forward(self, images, targets=None): # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]] """ Arguments: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if self.training: assert targets is not None for target in targets: # 进一步判断传入的target的boxes参数是否符合规定 boxes = target["boxes"] if isinstance(boxes, torch.Tensor): if len(boxes.shape) != 2 or boxes.shape[-1] != 4: raise ValueError("Expected target boxes to be a tensor" "of shape [N, 4], got {:}.".format( boxes.shape)) else: raise ValueError("Expected target boxes to be of type " "Tensor, got {:}.".format(type(boxes))) original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) for img in images: val = img.shape[-2:] assert len(val) == 2 # 防止输入的是个一维向量 original_image_sizes.append((val[0], val[1])) # original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) # 对图像进行预处理 # print(images.tensors.shape) features = self.backbone(images.tensors) # 将图像输入backbone得到特征图 if isinstance(features, torch.Tensor): # 若只在一层特征层上预测,将feature放入有序字典中,并编号为‘0’ features = OrderedDict([('0', features)]) # 若在多层特征层上预测,传入的就是一个有序字典 # 将特征层以及标注target信息传入rpn中 # proposals: List[Tensor], Tensor_shape: [num_proposals, 4], # 每个proposals是绝对坐标,且为(x1, y1, x2, y2)格式 proposals, proposal_losses = self.rpn(images, features, targets) # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分 detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) # 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) losses = {} losses.update(detector_losses) losses.update(proposal_losses) if torch.jit.is_scripting(): if not self._has_warned: warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting") self._has_warned = True return losses, detections else: return self.eager_outputs(losses, detections) # if self.training: # return losses # # return detections class TwoMLPHead(nn.Module): """ Standard heads for FPN-based models Arguments: in_channels (int): number of input channels representation_size (int): size of the intermediate representation """ def __init__(self, in_channels, representation_size): super(TwoMLPHead, self).__init__() self.fc6 = nn.Linear(in_channels, representation_size) self.fc7 = nn.Linear(representation_size, representation_size) def forward(self, x): x = x.flatten(start_dim=1) x = F.relu(self.fc6(x)) x = F.relu(self.fc7(x)) return x class FastRCNNPredictor(nn.Module): """ Standard classification + bounding box regression layers for Fast R-CNN. Arguments: in_channels (int): number of input channels num_classes (int): number of output classes (including background) """ def __init__(self, in_channels, num_classes): super(FastRCNNPredictor, self).__init__() self.cls_score = nn.Linear(in_channels, num_classes) self.bbox_pred = nn.Linear(in_channels, num_classes * 4) def forward(self, x): if x.dim() == 4: assert list(x.shape[2:]) == [1, 1] x = x.flatten(start_dim=1) scores = self.cls_score(x) bbox_deltas = self.bbox_pred(x) return scores, bbox_deltas class FasterRCNN(FasterRCNNBase): """ Implements Faster R-CNN. The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each image, and should be in 0-1 range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values between 0 and H and 0 and W - labels (Int64Tensor[N]): the class label for each ground-truth box The model returns a Dict[Tensor] during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between 0 and H and 0 and W - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction Arguments: backbone (nn.Module): the network used to compute the features for the model. It should contain a out_channels attribute, which indicates the number of output channels that each feature map has (and it should be the same for all feature maps). The backbone should return a single Tensor or and OrderedDict[Tensor]. num_classes (int): number of output classes of the model (including the background). If box_predictor is specified, num_classes should be None. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone image_mean (Tuple[float, float, float]): mean values used for input normalization. They are generally the mean values of the dataset on which the backbone has been trained on image_std (Tuple[float, float, float]): std values used for input normalization. They are generally the std values of the dataset on which the backbone has been trained on rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN rpn_score_thresh (float): during inference, only return proposals with a classification score greater than rpn_score_thresh box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in the locations indicated by the bounding boxes box_head (nn.Module): module that takes the cropped feature maps as input box_predictor (nn.Module): module that takes the output of box_head and returns the classification logits and box regression deltas. box_score_thresh (float): during inference, only return proposals with a classification score greater than box_score_thresh box_nms_thresh (float): NMS threshold for the prediction head. Used during inference box_detections_per_img (int): maximum number of detections per image, for all classes. box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be considered as positive during training of the classification head box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be considered as negative during training of the classification head box_batch_size_per_image (int): number of proposals that are sampled during training of the classification head box_positive_fraction (float): proportion of positive proposals in a mini-batch during training of the classification head bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the bounding boxes """ def __init__(self, backbone, num_classes=None, # transform parameter min_size=800, max_size=1333, # 预处理resize时限制的最小尺寸与最大尺寸 image_mean=None, image_std=None, # 预处理normalize时使用的均值和方差 # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, # rpn中在nms处理前保留的proposal数(根据score) rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, # rpn中在nms处理后保留的proposal数 rpn_nms_thresh=0.7, # rpn中进行nms处理时使用的iou阈值 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, # rpn计算损失时,采集正负样本设置的阈值 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # rpn计算损失时采样的样本数,以及正样本占总样本的比例 rpn_score_thresh=0.0, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, # 移除低目标概率 fast rcnn中进行nms处理的阈值 对预测结果根据score排序取前100个目标 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, # fast rcnn计算误差时,采集正负样本设置的阈值 box_batch_size_per_image=512, box_positive_fraction=0.25, # fast rcnn计算误差时采样的样本数,以及正样本占所有样本的比例 bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels" "specifying the number of output channels (assumed to be the" "same for all the levels" ) # assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError("num_classes should be None when box_predictor " "is specified") else: if box_predictor is None: raise ValueError("num_classes should not be None when box_predictor " "is not specified") # 预测特征层的channels out_channels = backbone.out_channels # 若anchor生成器为空,则自动生成针对resnet50_fpn的anchor生成器 if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorsGenerator( anchor_sizes, aspect_ratios ) # 生成RPN通过滑动窗口预测网络部分 if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0] ) # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000, # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000, rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) # 定义整个RPN框架 rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) # Multi-scale RoIAlign pooling if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], # 在哪些特征层进行roi pooling output_size=[7, 7], sampling_ratio=2) # fast RCNN中roi pooling后的展平处理两个全连接层部分 if box_head is None: resolution = box_roi_pool.output_size[0] # 默认等于7 representation_size = 1024 box_head = TwoMLPHead( out_channels * resolution ** 2, representation_size ) # 在box_head的输出上预测部分 if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( representation_size, num_classes) # 将roi pooling, box_head以及box_predictor结合在一起 roi_heads = RoIHeads( # box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, # 0.5 0.5 box_batch_size_per_image, box_positive_fraction, # 512 0.25 bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) # 0.05 0.5 100 if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] # 对数据进行标准化,缩放,打包成batch等处理部分 transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform) ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/image_list.py ================================================ from typing import List, Tuple from torch import Tensor class ImageList(object): """ Structure that holds a list of images (of possibly varying sizes) as a single tensor. This works by padding the images to the same size, and storing in a field the original sizes of each image """ def __init__(self, tensors, image_sizes): # type: (Tensor, List[Tuple[int, int]]) -> None """ Arguments: tensors (tensor) padding后的图像数据 image_sizes (list[tuple[int, int]]) padding前的图像尺寸 """ self.tensors = tensors self.image_sizes = image_sizes def to(self, device): # type: (Device) -> ImageList # noqa cast_tensor = self.tensors.to(device) return ImageList(cast_tensor, self.image_sizes) ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/mask_rcnn.py ================================================ from collections import OrderedDict import torch.nn as nn from torchvision.ops import MultiScaleRoIAlign from .faster_rcnn_framework import FasterRCNN class MaskRCNN(FasterRCNN): """ Implements Mask R-CNN. The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each image, and should be in 0-1 range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. - labels (Int64Tensor[N]): the class label for each ground-truth box - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance The model returns a Dict[Tensor] during training, containing the classification and regression losses for both the RPN and the R-CNN, and the mask loss. During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to obtain the final segmentation masks, the soft masks can be thresholded, generally with a value of 0.5 (mask >= 0.5) Args: backbone (nn.Module): the network used to compute the features for the model. It should contain a out_channels attribute, which indicates the number of output channels that each feature map has (and it should be the same for all feature maps). The backbone should return a single Tensor or and OrderedDict[Tensor]. num_classes (int): number of output classes of the model (including the background). If box_predictor is specified, num_classes should be None. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone image_mean (Tuple[float, float, float]): mean values used for input normalization. They are generally the mean values of the dataset on which the backbone has been trained on image_std (Tuple[float, float, float]): std values used for input normalization. They are generally the std values of the dataset on which the backbone has been trained on rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN rpn_score_thresh (float): during inference, only return proposals with a classification score greater than rpn_score_thresh box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in the locations indicated by the bounding boxes box_head (nn.Module): module that takes the cropped feature maps as input box_predictor (nn.Module): module that takes the output of box_head and returns the classification logits and box regression deltas. box_score_thresh (float): during inference, only return proposals with a classification score greater than box_score_thresh box_nms_thresh (float): NMS threshold for the prediction head. Used during inference box_detections_per_img (int): maximum number of detections per image, for all classes. box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be considered as positive during training of the classification head box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be considered as negative during training of the classification head box_batch_size_per_image (int): number of proposals that are sampled during training of the classification head box_positive_fraction (float): proportion of positive proposals in a mini-batch during training of the classification head bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the bounding boxes mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in the locations indicated by the bounding boxes, which will be used for the mask head. mask_head (nn.Module): module that takes the cropped feature maps as input mask_predictor (nn.Module): module that takes the output of the mask_head and returns the segmentation mask logits """ def __init__( self, backbone, num_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, rpn_score_thresh=0.0, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None, # Mask parameters mask_roi_pool=None, mask_head=None, mask_predictor=None, ): if not isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None))): raise TypeError( f"mask_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(mask_roi_pool)}" ) if num_classes is not None: if mask_predictor is not None: raise ValueError("num_classes should be None when mask_predictor is specified") out_channels = backbone.out_channels if mask_roi_pool is None: mask_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=14, sampling_ratio=2) if mask_head is None: mask_layers = (256, 256, 256, 256) mask_dilation = 1 mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) if mask_predictor is None: mask_predictor_in_channels = 256 mask_dim_reduced = 256 mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes) super().__init__( backbone, num_classes, # transform parameters min_size, max_size, image_mean, image_std, # RPN-specific parameters rpn_anchor_generator, rpn_head, rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test, rpn_post_nms_top_n_train, rpn_post_nms_top_n_test, rpn_nms_thresh, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_score_thresh, # Box parameters box_roi_pool, box_head, box_predictor, box_score_thresh, box_nms_thresh, box_detections_per_img, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, ) self.roi_heads.mask_roi_pool = mask_roi_pool self.roi_heads.mask_head = mask_head self.roi_heads.mask_predictor = mask_predictor class MaskRCNNHeads(nn.Sequential): def __init__(self, in_channels, layers, dilation): """ Args: in_channels (int): number of input channels layers (tuple): feature dimensions of each FCN layer dilation (int): dilation rate of kernel """ d = OrderedDict() next_feature = in_channels for layer_idx, layers_features in enumerate(layers, 1): d[f"mask_fcn{layer_idx}"] = nn.Conv2d(next_feature, layers_features, kernel_size=3, stride=1, padding=dilation, dilation=dilation) d[f"relu{layer_idx}"] = nn.ReLU(inplace=True) next_feature = layers_features super().__init__(d) # initial params for name, param in self.named_parameters(): if "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") class MaskRCNNPredictor(nn.Sequential): def __init__(self, in_channels, dim_reduced, num_classes): super().__init__(OrderedDict([ ("conv5_mask", nn.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)), ("relu", nn.ReLU(inplace=True)), ("mask_fcn_logits", nn.Conv2d(dim_reduced, num_classes, 1, 1, 0)) ])) # initial params for name, param in self.named_parameters(): if "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/roi_head.py ================================================ from typing import Optional, List, Dict, Tuple import torch from torch import Tensor import torch.nn.functional as F from torchvision.ops import roi_align from . import det_utils from . import boxes as box_ops def fastrcnn_loss(class_logits, box_regression, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ Computes the loss for Faster R-CNN. Arguments: class_logits : 预测类别概率信息,shape=[num_anchors, num_classes] box_regression : 预测边目标界框回归信息 labels : 真实类别信息 regression_targets : 真实目标边界框信息 Returns: classification_loss (Tensor) box_loss (Tensor) """ labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) # 计算类别损失信息 classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing # 返回标签类别大于0的索引 # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1) sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0] # 返回标签类别大于0位置的类别信息 labels_pos = labels[sampled_pos_inds_subset] # shape=[num_proposal, num_classes] N, num_classes = class_logits.shape box_regression = box_regression.reshape(N, -1, 4) # 计算边界框损失信息 box_loss = det_utils.smooth_l1_loss( # 获取指定索引proposal的指定类别box信息 box_regression[sampled_pos_inds_subset, labels_pos], regression_targets[sampled_pos_inds_subset], beta=1 / 9, size_average=False, ) / labels.numel() return classification_loss, box_loss def maskrcnn_inference(x, labels): # type: (Tensor, List[Tensor]) -> List[Tensor] """ From the results of the CNN, post process the masks by taking the mask corresponding to the class with max probability (which are of fixed size and directly output by the CNN) and return the masks in the mask field of the BoxList. Args: x (Tensor): the mask logits labels (list[BoxList]): bounding boxes that are used as reference, one for ech image Returns: results (list[BoxList]): one BoxList for each image, containing the extra field mask """ # 将预测值通过sigmoid激活全部缩放到0~1之间 mask_prob = x.sigmoid() # select masks corresponding to the predicted classes num_masks = x.shape[0] # 先记录每张图片中boxes/masks的个数 boxes_per_image = [label.shape[0] for label in labels] # 在将所有图片中的masks信息拼接在一起(拼接后统一处理能够提升并行度) labels = torch.cat(labels) index = torch.arange(num_masks, device=labels.device) # 提取每个masks中对应预测最终类别的mask mask_prob = mask_prob[index, labels][:, None] # 最后再按照每张图片中的masks个数分离开 mask_prob = mask_prob.split(boxes_per_image, dim=0) return mask_prob def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M): # type: (Tensor, Tensor, Tensor, int) -> Tensor """ Given segmentation masks and the bounding boxes corresponding to the location of the masks in the image, this function crops and resizes the masks in the position defined by the boxes. This prepares the masks for them to be fed to the loss computation as the targets. """ matched_idxs = matched_idxs.to(boxes) rois = torch.cat([matched_idxs[:, None], boxes], dim=1) gt_masks = gt_masks[:, None].to(rois) return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0] def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs): # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor """ Args: mask_logits: proposals: gt_masks: gt_labels: mask_matched_idxs: Returns: mask_loss (Tensor): scalar tensor containing the loss """ # 28(FCN分支输出mask的大小) discretization_size = mask_logits.shape[-1] # 获取每个Proposal(全部为正样本)对应的gt类别 labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)] # 根据Proposal信息在gt_masks上裁剪对应区域做为计算loss时的真正gt_mask mask_targets = [ project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs) ] # 将一个batch中所有的Proposal对应信息拼接在一起(统一处理提高并行度) labels = torch.cat(labels, dim=0) mask_targets = torch.cat(mask_targets, dim=0) # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: return mask_logits.sum() * 0 # 计算预测mask与真实gt_mask之间的BCELoss mask_loss = F.binary_cross_entropy_with_logits( mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets ) return mask_loss class RoIHeads(torch.nn.Module): __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, } def __init__(self, box_roi_pool, # Multi-scale RoIAlign pooling box_head, # TwoMLPHead box_predictor, # FastRCNNPredictor # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, # default: 0.5, 0.5 batch_size_per_image, positive_fraction, # default: 512, 0.25 bbox_reg_weights, # None # Faster R-CNN inference score_thresh, # default: 0.05 nms_thresh, # default: 0.5 detection_per_img, # default: 100 # Mask mask_roi_pool=None, mask_head=None, mask_predictor=None, ): super(RoIHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, # default: 0.5 bg_iou_thresh, # default: 0.5 allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, # default: 512 positive_fraction) # default: 0.25 if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool # Multi-scale RoIAlign pooling self.box_head = box_head # TwoMLPHead self.box_predictor = box_predictor # FastRCNNPredictor self.score_thresh = score_thresh # default: 0.05 self.nms_thresh = nms_thresh # default: 0.5 self.detection_per_img = detection_per_img # default: 100 self.mask_roi_pool = mask_roi_pool self.mask_head = mask_head self.mask_predictor = mask_predictor def has_mask(self): if self.mask_roi_pool is None: return False if self.mask_head is None: return False if self.mask_predictor is None: return False return True def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels): # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ 为每个proposal匹配对应的gt_box,并划分到正负样本中 Args: proposals: gt_boxes: gt_labels: Returns: """ matched_idxs = [] labels = [] # 遍历每张图像的proposals, gt_boxes, gt_labels信息 for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels): if gt_boxes_in_image.numel() == 0: # 该张图像中没有gt框,为背景 # background image device = proposals_in_image.device clamped_matched_idxs_in_image = torch.zeros( (proposals_in_image.shape[0],), dtype=torch.int64, device=device ) labels_in_image = torch.zeros( (proposals_in_image.shape[0],), dtype=torch.int64, device=device ) else: # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands # 计算proposal与每个gt_box的iou重合度 match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image) # 计算proposal与每个gt_box匹配的iou最大值,并记录索引, # iou < low_threshold索引值为 -1, low_threshold <= iou < high_threshold索引值为 -2 matched_idxs_in_image = self.proposal_matcher(match_quality_matrix) # 限制最小值,防止匹配标签时出现越界的情况 # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别(实际上并不是),后续会进一步处理 clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0) # 获取proposal匹配到的gt对应标签 labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image] labels_in_image = labels_in_image.to(dtype=torch.int64) # label background (below the low threshold) # 将gt索引为-1的类别设置为0,即背景,负样本 bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD # -1 labels_in_image[bg_inds] = 0 # label ignore proposals (between low and high threshold) # 将gt索引为-2的类别设置为-1, 即废弃样本 ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS # -2 labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler matched_idxs.append(clamped_matched_idxs_in_image) labels.append(labels_in_image) return matched_idxs, labels def subsample(self, labels): # type: (List[Tensor]) -> List[Tensor] # BalancedPositiveNegativeSampler sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_inds = [] # 遍历每张图片的正负样本索引 for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)): # 记录所有采集样本索引(包括正样本和负样本) # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0] sampled_inds.append(img_sampled_inds) return sampled_inds def add_gt_proposals(self, proposals, gt_boxes): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 将gt_boxes拼接到proposal后面 Args: proposals: 一个batch中每张图像rpn预测的boxes gt_boxes: 一个batch中每张图像对应的真实目标边界框 Returns: """ proposals = [ torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] return proposals def check_targets(self, targets): # type: (Optional[List[Dict[str, Tensor]]]) -> None assert targets is not None assert all(["boxes" in t for t in targets]) assert all(["labels" in t for t in targets]) def select_training_samples(self, proposals, # type: List[Tensor] targets # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]] """ 划分正负样本,统计对应gt的标签以及边界框回归信息 list元素个数为batch_size Args: proposals: rpn预测的boxes targets: Returns: """ # 检查target数据是否为空 self.check_targets(targets) if targets is None: raise ValueError("target should not be None.") dtype = proposals[0].dtype device = proposals[0].device # 获取标注好的boxes以及labels信息 gt_boxes = [t["boxes"].to(dtype) for t in targets] gt_labels = [t["labels"] for t in targets] # append ground-truth bboxes to proposal # 将gt_boxes拼接到proposal后面 proposals = self.add_gt_proposals(proposals, gt_boxes) # get matching gt indices for each proposal # 为每个proposal匹配对应的gt_box,并划分到正负样本中 matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels) # sample a fixed proportion of positive-negative proposals # 按给定数量和比例采样正负样本 sampled_inds = self.subsample(labels) matched_gt_boxes = [] num_images = len(proposals) # 遍历每张图像 for img_id in range(num_images): # 获取每张图像的正负样本索引 img_sampled_inds = sampled_inds[img_id] # 获取对应正负样本的proposals信息 proposals[img_id] = proposals[img_id][img_sampled_inds] # 获取对应正负样本的真实类别信息 labels[img_id] = labels[img_id][img_sampled_inds] # 获取对应正负样本的gt索引信息 matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds] gt_boxes_in_image = gt_boxes[img_id] if gt_boxes_in_image.numel() == 0: gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device) # 获取对应正负样本的gt box信息 matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]]) # 根据gt和proposal计算边框回归参数(针对gt的) regression_targets = self.box_coder.encode(matched_gt_boxes, proposals) return proposals, matched_idxs, labels, regression_targets def postprocess_detections(self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] """ 对网络的预测数据进行后处理,包括 (1)根据proposal以及预测的回归参数计算出最终bbox坐标 (2)对预测类别结果进行softmax处理 (3)裁剪预测的boxes信息,将越界的坐标调整到图片边界上 (4)移除所有背景信息 (5)移除低概率目标 (6)移除小尺寸目标 (7)执行nms处理,并按scores进行排序 (8)根据scores排序返回前topk个目标 Args: class_logits: 网络预测类别概率信息 box_regression: 网络预测的边界框回归参数 proposals: rpn输出的proposal image_shapes: 打包成batch前每张图像的宽高 Returns: """ device = class_logits.device # 预测目标类别数 num_classes = class_logits.shape[-1] # 获取每张图像的预测bbox数量 boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] # 根据proposal以及预测的回归参数计算出最终bbox坐标 pred_boxes = self.box_coder.decode(box_regression, proposals) # 对预测类别结果进行softmax处理 pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image # 根据每张图像的预测bbox数量分割结果 pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] # 遍历每张图像预测信息 for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): # 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove prediction with the background label # 移除索引为0的所有信息(0代表背景) boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes # 移除低概率目标,self.scores_thresh=0.05 # gt: Computes input > other element-wise. # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1) inds = torch.where(torch.gt(scores, self.score_thresh))[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes # 移除小目标 keep = box_ops.remove_small_boxes(boxes, min_size=1.) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximun suppression, independently done per class # 执行nms处理,执行后的结果会按照scores从大到小进行排序返回 keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions # 获取scores排在前topk个预测目标 keep = keep[:self.detection_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels def forward(self, features, # type: Dict[str, Tensor] proposals, # type: List[Tensor] image_shapes, # type: List[Tuple[int, int]] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]] """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ # 检查targets的数据类型是否正确 if targets is not None: for t in targets: floating_point_types = (torch.float, torch.double, torch.half) assert t["boxes"].dtype in floating_point_types, "target boxes must of float type" assert t["labels"].dtype == torch.int64, "target labels must of int64 type" if self.training: # 划分正负样本,统计对应gt的标签以及边界框回归信息 proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets) else: labels = None regression_targets = None matched_idxs = None # 将采集样本通过Multi-scale RoIAlign pooling层 # box_features_shape: [num_proposals, channel, height, width] box_features = self.box_roi_pool(features, proposals, image_shapes) # 通过roi_pooling后的两层全连接层 # box_features_shape: [num_proposals, representation_size] box_features = self.box_head(box_features) # 接着分别预测目标类别和边界框回归参数 class_logits, box_regression = self.box_predictor(box_features) result: List[Dict[str, torch.Tensor]] = [] losses = {} if self.training: assert labels is not None and regression_targets is not None loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = { "loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg } else: boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append( { "boxes": boxes[i], "labels": labels[i], "scores": scores[i], } ) if self.has_mask(): mask_proposals = [p["boxes"] for p in result] # 将最终预测的Boxes信息取出 if self.training: # matched_idxs为每个proposal在正负样本匹配过程中得到的gt索引(背景的gt索引也默认设置成了0) if matched_idxs is None: raise ValueError("if in training, matched_idxs should not be None") # during training, only focus on positive boxes num_images = len(proposals) mask_proposals = [] pos_matched_idxs = [] for img_id in range(num_images): pos = torch.where(labels[img_id] > 0)[0] # 寻找对应gt类别大于0,即正样本 mask_proposals.append(proposals[img_id][pos]) pos_matched_idxs.append(matched_idxs[img_id][pos]) else: pos_matched_idxs = None mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) mask_features = self.mask_head(mask_features) mask_logits = self.mask_predictor(mask_features) loss_mask = {} if self.training: if targets is None or pos_matched_idxs is None or mask_logits is None: raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training") gt_masks = [t["masks"] for t in targets] gt_labels = [t["labels"] for t in targets] rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs) loss_mask = {"loss_mask": rcnn_loss_mask} else: labels = [r["labels"] for r in result] mask_probs = maskrcnn_inference(mask_logits, labels) for mask_prob, r in zip(mask_probs, result): r["masks"] = mask_prob losses.update(loss_mask) return result, losses ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/rpn_function.py ================================================ from typing import List, Optional, Dict, Tuple import torch from torch import nn, Tensor from torch.nn import functional as F import torchvision from . import det_utils from . import boxes as box_ops from .image_list import ImageList @torch.jit.unused def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n): # type: (Tensor, int) -> Tuple[int, int] from torch.onnx import operators num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) pre_nms_top_n = torch.min(torch.cat( (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), num_anchors), 0)) return num_anchors, pre_nms_top_n class AnchorsGenerator(nn.Module): __annotations__ = { "cell_anchors": Optional[List[torch.Tensor]], "_cache": Dict[str, List[torch.Tensor]] } """ anchors生成器 Module that generates anchors for a set of feature maps and image sizes. The module support computing anchors at multiple sizes and aspect ratios per feature map. sizes and aspect_ratios should have the same number of elements, and it should correspond to the number of feature maps. sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors per spatial location for feature map i. Arguments: sizes (Tuple[Tuple[int]]): aspect_ratios (Tuple[Tuple[float]]): """ def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)): super(AnchorsGenerator, self).__init__() if not isinstance(sizes[0], (list, tuple)): # TODO change this sizes = tuple((s,) for s in sizes) if not isinstance(aspect_ratios[0], (list, tuple)): aspect_ratios = (aspect_ratios,) * len(sizes) assert len(sizes) == len(aspect_ratios) self.sizes = sizes self.aspect_ratios = aspect_ratios self.cell_anchors = None self._cache = {} def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")): # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor """ compute anchor sizes Arguments: scales: sqrt(anchor_area) aspect_ratios: h/w ratios dtype: float32 device: cpu/gpu """ scales = torch.as_tensor(scales, dtype=dtype, device=device) aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) h_ratios = torch.sqrt(aspect_ratios) w_ratios = 1.0 / h_ratios # [r1, r2, r3]' * [s1, s2, s3] # number of elements is len(ratios)*len(scales) ws = (w_ratios[:, None] * scales[None, :]).view(-1) hs = (h_ratios[:, None] * scales[None, :]).view(-1) # left-top, right-bottom coordinate relative to anchor center(0, 0) # 生成的anchors模板都是以(0, 0)为中心的, shape [len(ratios)*len(scales), 4] base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 return base_anchors.round() # round 四舍五入 def set_cell_anchors(self, dtype, device): # type: (torch.dtype, torch.device) -> None if self.cell_anchors is not None: cell_anchors = self.cell_anchors assert cell_anchors is not None # suppose that all anchors have the same device # which is a valid assumption in the current state of the codebase if cell_anchors[0].device == device: return # 根据提供的sizes和aspect_ratios生成anchors模板 # anchors模板都是以(0, 0)为中心的anchor cell_anchors = [ self.generate_anchors(sizes, aspect_ratios, dtype, device) for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios) ] self.cell_anchors = cell_anchors def num_anchors_per_location(self): # 计算每个预测特征层上每个滑动窗口的预测目标数 return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. def grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """ anchors position in grid coordinate axis map into origin image 计算预测特征图对应原始图像上的所有anchors的坐标 Args: grid_sizes: 预测特征矩阵的height和width strides: 预测特征矩阵上一步对应原始图像上的步距 """ anchors = [] cell_anchors = self.cell_anchors assert cell_anchors is not None # 遍历每个预测特征层的grid_size,strides和cell_anchors for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): grid_height, grid_width = size stride_height, stride_width = stride device = base_anchors.device # For output anchor, compute [x_center, y_center, x_center, y_center] # shape: [grid_width] 对应原图上的x坐标(列) shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width # shape: [grid_height] 对应原图上的y坐标(行) shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量) # torch.meshgrid函数分别传入行坐标和列坐标,生成网格行坐标矩阵和网格列坐标矩阵 # shape: [grid_height, grid_width] shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量 # shape: [grid_width*grid_height, 4] shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1) # For every (base anchor, output anchor) pair, # offset each zero-centered base anchor by the center of the output anchor. # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制) shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4) anchors.append(shifts_anchor.reshape(-1, 4)) return anchors # List[Tensor(all_num_anchors, 4)] def cached_grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """将计算得到的所有anchors信息进行缓存""" key = str(grid_sizes) + str(strides) # self._cache是字典类型 if key in self._cache: return self._cache[key] anchors = self.grid_anchors(grid_sizes, strides) self._cache[key] = anchors return anchors def forward(self, image_list, feature_maps): # type: (ImageList, List[Tensor]) -> List[Tensor] # 获取每个预测特征层的尺寸(height, width) grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps]) # 获取输入图像的height和width image_size = image_list.tensors.shape[-2:] # 获取变量类型和设备类型 dtype, device = feature_maps[0].dtype, feature_maps[0].device # one step in feature map equate n pixel stride in origin image # 计算特征层上的一步等于原始图像上的步长 strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device), torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes] # 根据提供的sizes和aspect_ratios生成anchors模板 self.set_cell_anchors(dtype, device) # 计算/读取所有anchors的坐标信息(这里的anchors信息是映射到原图上的所有anchors信息,不是anchors模板) # 得到的是一个list列表,对应每张预测特征图映射回原图的anchors坐标信息 anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # 遍历一个batch中的每张图像 for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image = [] # 遍历每张预测特征图映射回原图的anchors坐标信息 for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起 # anchors是个list,每个元素为一张图像的所有anchors信息 anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] # Clear the cache in case that memory leaks. self._cache.clear() return anchors class RPNHead(nn.Module): """ add a RPN head with classification and regression 通过滑动窗口计算预测目标概率与bbox regression参数 Arguments: in_channels: number of channels of the input feature num_anchors: number of anchors to be predicted """ def __init__(self, in_channels, num_anchors): super(RPNHead, self).__init__() # 3x3 滑动窗口 self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) # 计算预测的目标分数(这里的目标只是指前景或者背景) self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) # 计算预测的目标bbox regression参数 self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1) for layer in self.children(): if isinstance(layer, nn.Conv2d): torch.nn.init.normal_(layer.weight, std=0.01) torch.nn.init.constant_(layer.bias, 0) def forward(self, x): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] logits = [] bbox_reg = [] for i, feature in enumerate(x): t = F.relu(self.conv(feature)) logits.append(self.cls_logits(t)) bbox_reg.append(self.bbox_pred(t)) return logits, bbox_reg def permute_and_flatten(layer, N, A, C, H, W): # type: (Tensor, int, int, int, int, int) -> Tensor """ 调整tensor顺序,并进行reshape Args: layer: 预测特征层上预测的目标概率或bboxes regression参数 N: batch_size A: anchors_num_per_position C: classes_num or 4(bbox coordinate) H: height W: width Returns: layer: 调整tensor顺序,并reshape后的结果[N, -1, C] """ # view和reshape功能是一样的,先展平所有元素在按照给定shape排列 # view函数只能用于内存中连续存储的tensor,permute等操作会使tensor在内存中变得不再连续,此时就不能再调用view函数 # reshape则不需要依赖目标tensor是否在内存中是连续的 # [batch_size, anchors_num_per_position * (C or 4), height, width] layer = layer.view(N, -1, C, H, W) # 调换tensor维度 layer = layer.permute(0, 3, 4, 1, 2) # [N, H, W, -1, C] layer = layer.reshape(N, -1, C) return layer def concat_box_prediction_layers(box_cls, box_regression): # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ 对box_cla和box_regression两个list中的每个预测特征层的预测信息 的tensor排列顺序以及shape进行调整 -> [N, -1, C] Args: box_cls: 每个预测特征层上的预测目标概率 box_regression: 每个预测特征层上的预测目标bboxes regression参数 Returns: """ box_cls_flattened = [] box_regression_flattened = [] # 遍历每个预测特征层 for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression): # [batch_size, anchors_num_per_position * classes_num, height, width] # 注意,当计算RPN中的proposal时,classes_num=1,只区分目标和背景 N, AxC, H, W = box_cls_per_level.shape # # [batch_size, anchors_num_per_position * 4, height, width] Ax4 = box_regression_per_level.shape[1] # anchors_num_per_position A = Ax4 // 4 # classes_num C = AxC // A # [N, -1, C] box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W) box_cls_flattened.append(box_cls_per_level) # [N, -1, C] box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W) box_regression_flattened.append(box_regression_per_level) box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2) # start_dim, end_dim box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4) return box_cls, box_regression class RegionProposalNetwork(torch.nn.Module): """ Implements Region Proposal Network (RPN). Arguments: anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. head (nn.Module): module that computes the objectness and regression deltas fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should contain two fields: training and testing, to allow for different values depending on training or evaluation post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should contain two fields: training and testing, to allow for different values depending on training or evaluation nms_thresh (float): NMS threshold used for postprocessing the RPN proposals """ __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, 'pre_nms_top_n': Dict[str, int], 'post_nms_top_n': Dict[str, int], } def __init__(self, anchor_generator, head, fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0): super(RegionProposalNetwork, self).__init__() self.anchor_generator = anchor_generator self.head = head self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) # use during training # 计算anchors与真实bbox的iou self.box_similarity = box_ops.box_iou self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, # 当iou大于fg_iou_thresh(0.7)时视为正样本 bg_iou_thresh, # 当iou小于bg_iou_thresh(0.3)时视为负样本 allow_low_quality_matches=True ) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction # 256, 0.5 ) # use during testing self._pre_nms_top_n = pre_nms_top_n self._post_nms_top_n = post_nms_top_n self.nms_thresh = nms_thresh self.score_thresh = score_thresh self.min_size = 1. def pre_nms_top_n(self): if self.training: return self._pre_nms_top_n['training'] return self._pre_nms_top_n['testing'] def post_nms_top_n(self): if self.training: return self._post_nms_top_n['training'] return self._post_nms_top_n['testing'] def assign_targets_to_anchors(self, anchors, targets): # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]] """ 计算每个anchors最匹配的gt,并划分为正样本,背景以及废弃的样本 Args: anchors: (List[Tensor]) targets: (List[Dict[Tensor]) Returns: labels: 标记anchors归属类别(1, 0, -1分别对应正样本,背景,废弃的样本) 注意,在RPN中只有前景和背景,所有正样本的类别都是1,0代表背景 matched_gt_boxes:与anchors匹配的gt """ labels = [] matched_gt_boxes = [] # 遍历每张图像的anchors和targets for anchors_per_image, targets_per_image in zip(anchors, targets): gt_boxes = targets_per_image["boxes"] if gt_boxes.numel() == 0: device = anchors_per_image.device matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device) labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device) else: # 计算anchors与真实bbox的iou信息 # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image) # 计算每个anchors与gt匹配iou最大的索引(如果iou<0.3索引置为-1,0.3= 0 labels_per_image = labels_per_image.to(dtype=torch.float32) # background (negative examples) bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD # -1 labels_per_image[bg_indices] = 0.0 # discard indices that are between thresholds inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS # -2 labels_per_image[inds_to_discard] = -1.0 labels.append(labels_per_image) matched_gt_boxes.append(matched_gt_boxes_per_image) return labels, matched_gt_boxes def _get_top_n_idx(self, objectness, num_anchors_per_level): # type: (Tensor, List[int]) -> Tensor """ 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 Args: objectness: Tensor(每张图像的预测目标概率信息 ) num_anchors_per_level: List(每个预测特征层上的预测的anchors个数) Returns: """ r = [] # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息 offset = 0 # 遍历每个预测特征层上的预测目标概率信息 for ob in objectness.split(num_anchors_per_level, 1): if torchvision._is_tracing(): num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n()) else: num_anchors = ob.shape[1] # 预测特征层上的预测的anchors个数 pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors) # Returns the k largest elements of the given input tensor along a given dimension _, top_n_idx = ob.topk(pre_nms_top_n, dim=1) r.append(top_n_idx + offset) offset += num_anchors return torch.cat(r, dim=1) def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]] """ 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 Args: proposals: 预测的bbox坐标 objectness: 预测的目标概率 image_shapes: batch中每张图片的size信息 num_anchors_per_level: 每个预测特征层上预测anchors的数目 Returns: """ num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) # Returns a tensor of size size filled with fill_value # levels负责记录分隔不同预测特征层上的anchors索引信息 levels = [torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level)] levels = torch.cat(levels, 0) # Expand this tensor to the same size as objectness levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] # [batch_size, 1] # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息 objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息 proposals = proposals[batch_idx, top_n_idx] objectness_prob = torch.sigmoid(objectness) final_boxes = [] final_scores = [] # 遍历每张图像的相关预测信息 for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes): # 调整预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, img_shape) # 返回boxes满足宽,高都大于min_size的索引 keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # 移除小概率boxes,参考下面这个链接 # https://github.com/pytorch/vision/pull/3205 keep = torch.where(torch.ge(scores, self.score_thresh))[0] # ge: >= boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[: self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ 计算RPN损失,包括类别损失(前景与背景),bbox regression损失 Arguments: objectness (Tensor):预测的前景概率 pred_bbox_deltas (Tensor):预测的bbox regression labels (List[Tensor]):真实的标签 1, 0, -1(batch中每一张图片的labels对应List的一个元素中) regression_targets (List[Tensor]):真实的bbox regression Returns: objectness_loss (Tensor) : 类别损失 box_loss (Tensor):边界框回归损失 """ # 按照给定的batch_size_per_image, positive_fraction选择正负样本 sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起,并获取非零位置的索引 # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0] # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0] # 将所有正负样本索引拼接在一起 sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness = objectness.flatten() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) # 计算边界框回归损失 box_loss = det_utils.smooth_l1_loss( pred_bbox_deltas[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1 / 9, size_average=False, ) / (sampled_inds.numel()) # 计算目标预测概率损失 objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss def forward(self, images, # type: ImageList features, # type: Dict[str, Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]] """ Arguments: images (ImageList): images for which we want to compute the predictions features (Dict[Tensor]): features computed from the images that are used for computing the predictions. Each tensor in the list correspond to different feature levels targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional). If provided, each element in the dict should contain a field `boxes`, with the locations of the ground-truth boxes. Returns: boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per image. losses (Dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. """ # RPN uses all feature maps that are available # features是所有预测特征层组成的OrderedDict features = list(features.values()) # 计算每个预测特征层上的预测目标概率和bboxes regression参数 # objectness和pred_bbox_deltas都是list objectness, pred_bbox_deltas = self.head(features) # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size anchors = self.anchor_generator(images, features) # batch_size num_images = len(anchors) # numel() Returns the total number of elements in the input tensor. # 计算每个预测特征层上的对应的anchors数量 num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness] num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors] # 调整内部tensor格式以及shape objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas) # apply pred_bbox_deltas to anchors to obtain the decoded proposals # note that we detach the deltas because Faster R-CNN do not backprop through # the proposals # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标 proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) proposals = proposals.view(num_images, -1, 4) # 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level) losses = {} if self.training: assert targets is not None # 计算每个anchors最匹配的gt,并将anchors进行分类,前景,背景以及废弃的anchors labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets) # 结合anchors以及对应的gt,计算regression参数 regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) loss_objectness, loss_rpn_box_reg = self.compute_loss( objectness, pred_bbox_deltas, labels, regression_targets ) losses = { "loss_objectness": loss_objectness, "loss_rpn_box_reg": loss_rpn_box_reg } return boxes, losses ================================================ FILE: pytorch_object_detection/mask_rcnn/network_files/transform.py ================================================ import math from typing import List, Tuple, Dict, Optional import torch from torch import nn, Tensor import torch.nn.functional as F import torchvision from .image_list import ImageList def _onnx_paste_mask_in_image(mask, box, im_h, im_w): one = torch.ones(1, dtype=torch.int64) zero = torch.zeros(1, dtype=torch.int64) w = box[2] - box[0] + one h = box[3] - box[1] + one w = torch.max(torch.cat((w, one))) h = torch.max(torch.cat((h, one))) # Set shape to [batchxCxHxW] mask = mask.expand((1, 1, mask.size(0), mask.size(1))) # Resize mask mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False) mask = mask[0][0] x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero))) x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0)))) y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero))) y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0)))) unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])] # TODO : replace below with a dynamic padding when support is added in ONNX # pad y zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1)) zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1)) concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :] # pad x zeros_x0 = torch.zeros(concat_0.size(0), x_0) zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1) im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w] return im_mask @torch.jit._script_if_tracing def _onnx_paste_mask_in_image_loop(masks, boxes, im_h, im_w): res_append = torch.zeros(0, im_h, im_w) for i in range(masks.size(0)): mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w) mask_res = mask_res.unsqueeze(0) res_append = torch.cat((res_append, mask_res)) return res_append @torch.jit.unused def _get_shape_onnx(image: Tensor) -> Tensor: from torch.onnx import operators return operators.shape_as_tensor(image)[-2:] @torch.jit.unused def _fake_cast_onnx(v: Tensor) -> float: # ONNX requires a tensor but here we fake its type for JIT. return v def _resize_image_and_masks(image: Tensor, self_min_size: float, self_max_size: float, target: Optional[Dict[str, Tensor]] = None, fixed_size: Optional[Tuple[int, int]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if torchvision._is_tracing(): im_shape = _get_shape_onnx(image) else: im_shape = torch.tensor(image.shape[-2:]) size: Optional[List[int]] = None scale_factor: Optional[float] = None recompute_scale_factor: Optional[bool] = None if fixed_size is not None: size = [fixed_size[1], fixed_size[0]] else: min_size = torch.min(im_shape).to(dtype=torch.float32) # 获取高宽中的最小值 max_size = torch.max(im_shape).to(dtype=torch.float32) # 获取高宽中的最大值 scale = torch.min(self_min_size / min_size, self_max_size / max_size) # 计算缩放比例 if torchvision._is_tracing(): scale_factor = _fake_cast_onnx(scale) else: scale_factor = scale.item() recompute_scale_factor = True # interpolate利用插值的方法缩放图片 # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W] # bilinear只支持4D Tensor image = torch.nn.functional.interpolate( image[None], size=size, scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=recompute_scale_factor, align_corners=False)[0] if target is None: return image, target if "masks" in target: mask = target["masks"] mask = torch.nn.functional.interpolate( mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor )[:, 0].byte() # self.byte() is equivalent to self.to(torch.uint8). target["masks"] = mask return image, target def _onnx_expand_boxes(boxes, scale): # type: (Tensor, float) -> Tensor w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5 h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5 x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5 y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5 w_half = w_half.to(dtype=torch.float32) * scale h_half = h_half.to(dtype=torch.float32) * scale boxes_exp0 = x_c - w_half boxes_exp1 = y_c - h_half boxes_exp2 = x_c + w_half boxes_exp3 = y_c + h_half boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1) return boxes_exp # the next two functions should be merged inside Masker # but are kept here for the moment while we need them # temporarily for paste_mask_in_image def expand_boxes(boxes, scale): # type: (Tensor, float) -> Tensor if torchvision._is_tracing(): return _onnx_expand_boxes(boxes, scale) w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5 h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5 x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5 y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5 w_half *= scale h_half *= scale boxes_exp = torch.zeros_like(boxes) boxes_exp[:, 0] = x_c - w_half boxes_exp[:, 2] = x_c + w_half boxes_exp[:, 1] = y_c - h_half boxes_exp[:, 3] = y_c + h_half return boxes_exp @torch.jit.unused def expand_masks_tracing_scale(M, padding): # type: (int, int) -> float return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32) def expand_masks(mask, padding): # type: (Tensor, int) -> Tuple[Tensor, float] M = mask.shape[-1] if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why scale = expand_masks_tracing_scale(M, padding) else: scale = float(M + 2 * padding) / M padded_mask = F.pad(mask, (padding,) * 4) return padded_mask, scale def paste_mask_in_image(mask, box, im_h, im_w): # type: (Tensor, Tensor, int, int) -> Tensor # refer to: https://github.com/pytorch/vision/issues/5845 TO_REMOVE = 1 w = int(box[2] - box[0] + TO_REMOVE) h = int(box[3] - box[1] + TO_REMOVE) w = max(w, 1) h = max(h, 1) # Set shape to [batch, C, H, W] # 因为后续的bilinear操作只支持4-D的Tensor mask = mask.expand((1, 1, -1, -1)) # -1 means not changing the size of that dimension # Resize mask mask = F.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False) mask = mask[0][0] # [batch, C, H, W] -> [H, W] im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device) # 填入原图的目标区域(防止越界) x_0 = max(box[0], 0) x_1 = min(box[2] + 1, im_w) y_0 = max(box[1], 0) y_1 = min(box[3] + 1, im_h) # 将resize后的mask填入对应目标区域 im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] return im_mask def paste_masks_in_image(masks, boxes, img_shape, padding=1): # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor # pytorch官方说对mask进行expand能够略微提升mAP # refer to: https://github.com/pytorch/vision/issues/5845 masks, scale = expand_masks(masks, padding=padding) boxes = expand_boxes(boxes, scale).to(dtype=torch.int64) im_h, im_w = img_shape if torchvision._is_tracing(): return _onnx_paste_mask_in_image_loop( masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64) )[:, None] res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)] if len(res) > 0: ret = torch.stack(res, dim=0)[:, None] # [num_obj, 1, H, W] else: ret = masks.new_empty((0, 1, im_h, im_w)) return ret class GeneralizedRCNNTransform(nn.Module): """ Performs input / target transformation before feeding the data to a GeneralizedRCNN model. The transformations it perform are: - input normalization (mean subtraction and std division) - input / target resizing to match min_size / max_size It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets """ def __init__(self, min_size: int, max_size: int, image_mean: List[float], image_std: List[float], size_divisible: int = 32, fixed_size: Optional[Tuple[int, int]] = None): super().__init__() if not isinstance(min_size, (list, tuple)): min_size = (min_size,) self.min_size = min_size # 指定图像的最小边长范围 self.max_size = max_size # 指定图像的最大边长范围 self.image_mean = image_mean # 指定图像在标准化处理中的均值 self.image_std = image_std # 指定图像在标准化处理中的方差 self.size_divisible = size_divisible self.fixed_size = fixed_size def normalize(self, image): """标准化处理""" dtype, device = image.dtype, image.device mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) std = torch.as_tensor(self.image_std, dtype=dtype, device=device) # [:, None, None]: shape [3] -> [3, 1, 1] return (image - mean[:, None, None]) / std[:, None, None] def torch_choice(self, k): # type: (List[int]) -> int """ Implements `random.choice` via torch ops so it can be compiled with TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 is fixed. """ index = int(torch.empty(1).uniform_(0., float(len(k))).item()) return k[index] def resize(self, image, target): # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] """ 将图片缩放到指定的大小范围内,并对应缩放bboxes信息 Args: image: 输入的图片 target: 输入图片的相关信息(包括bboxes信息) Returns: image: 缩放后的图片 target: 缩放bboxes后的图片相关信息 """ # image shape is [channel, height, width] h, w = image.shape[-2:] if self.training: size = float(self.torch_choice(self.min_size)) # 指定输入图片的最小边长,注意是self.min_size不是min_size else: # FIXME assume for now that testing uses the largest scale size = float(self.min_size[-1]) # 指定输入图片的最小边长,注意是self.min_size不是min_size image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size) if target is None: return image, target bbox = target["boxes"] # 根据图像的缩放比例来缩放bbox bbox = resize_boxes(bbox, [h, w], image.shape[-2:]) target["boxes"] = bbox return image, target # _onnx_batch_images() is an implementation of # batch_images() that is supported by ONNX tracing. @torch.jit.unused def _onnx_batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor max_size = [] for i in range(images[0].dim()): max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) max_size.append(max_size_i) stride = size_divisible max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) max_size = tuple(max_size) # work around for # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) # which is not yet supported in onnx padded_imgs = [] for img in images: padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]]) padded_imgs.append(padded_img) return torch.stack(padded_imgs) def max_by_axis(self, the_list): # type: (List[List[int]]) -> List[int] maxes = the_list[0] for sublist in the_list[1:]: for index, item in enumerate(sublist): maxes[index] = max(maxes[index], item) return maxes def batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor """ 将一批图像打包成一个batch返回(注意batch中每个tensor的shape是相同的) Args: images: 输入的一批图片 size_divisible: 将图像高和宽调整到该数的整数倍 Returns: batched_imgs: 打包成一个batch后的tensor数据 """ if torchvision._is_tracing(): # batch_images() does not export well to ONNX # call _onnx_batch_images() instead return self._onnx_batch_images(images, size_divisible) # 分别计算一个batch中所有图片中的最大channel, height, width max_size = self.max_by_axis([list(img.shape) for img in images]) stride = float(size_divisible) # max_size = list(max_size) # 将height向上调整到stride的整数倍 max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) # 将width向上调整到stride的整数倍 max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) # [batch, channel, height, width] batch_shape = [len(images)] + max_size # 创建shape为batch_shape且值全部为0的tensor batched_imgs = images[0].new_full(batch_shape, 0) for img, pad_img in zip(images, batched_imgs): # 将输入images中的每张图片复制到新的batched_imgs的每张图片中,对齐左上角,保证bboxes的坐标不变 # 这样保证输入到网络中一个batch的每张图片的shape相同 # copy_: Copies the elements from src into self tensor and returns self pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) return batched_imgs def postprocess(self, result, # type: List[Dict[str, Tensor]] image_shapes, # type: List[Tuple[int, int]] original_image_sizes # type: List[Tuple[int, int]] ): # type: (...) -> List[Dict[str, Tensor]] """ 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) Args: result: list(dict), 网络的预测结果, len(result) == batch_size image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size Returns: """ if self.training: return result # 遍历每张图片的预测信息,将boxes信息还原回原尺度 for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): boxes = pred["boxes"] boxes = resize_boxes(boxes, im_s, o_im_s) # 将bboxes缩放回原图像尺度上 result[i]["boxes"] = boxes if "masks" in pred: masks = pred["masks"] # 将mask映射回原图尺度 masks = paste_masks_in_image(masks, boxes, o_im_s) result[i]["masks"] = masks return result def __repr__(self): """自定义输出实例化对象的信息,可通过print打印实例信息""" format_string = self.__class__.__name__ + '(' _indent = '\n ' format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std) format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size, self.max_size) format_string += '\n)' return format_string def forward(self, images, # type: List[Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]] images = [img for img in images] for i in range(len(images)): image = images[i] target_index = targets[i] if targets is not None else None if image.dim() != 3: raise ValueError("images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) image = self.normalize(image) # 对图像进行标准化处理 image, target_index = self.resize(image, target_index) # 对图像和对应的bboxes缩放到指定范围 images[i] = image if targets is not None and target_index is not None: targets[i] = target_index # 记录resize后的图像尺寸 image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images, self.size_divisible) # 将images打包成一个batch image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) for image_size in image_sizes: assert len(image_size) == 2 image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes_list) return image_list, targets def resize_boxes(boxes, original_size, new_size): # type: (Tensor, List[int], List[int]) -> Tensor """ 将boxes参数根据图像的缩放情况进行相应缩放 Arguments: original_size: 图像缩放前的尺寸 new_size: 图像缩放后的尺寸 """ ratios = [ torch.tensor(s, dtype=torch.float32, device=boxes.device) / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) for s, s_orig in zip(new_size, original_size) ] ratios_height, ratios_width = ratios # Removes a tensor dimension, boxes [minibatch, 4] # Returns a tuple of all slices along a given dimension, already without it. xmin, ymin, xmax, ymax = boxes.unbind(1) xmin = xmin * ratios_width xmax = xmax * ratios_width ymin = ymin * ratios_height ymax = ymax * ratios_height return torch.stack((xmin, ymin, xmax, ymax), dim=1) ================================================ FILE: pytorch_object_detection/mask_rcnn/pascal_voc_indices.json ================================================ { "1": "aeroplane", "2": "bicycle", "3": "bird", "4": "boat", "5": "bottle", "6": "bus", "7": "car", "8": "cat", "9": "chair", "10": "cow", "11": "diningtable", "12": "dog", "13": "horse", "14": "motorbike", "15": "person", "16": "pottedplant", "17": "sheep", "18": "sofa", "19": "train", "20": "tvmonitor" } ================================================ FILE: pytorch_object_detection/mask_rcnn/plot_curve.py ================================================ import datetime import matplotlib.pyplot as plt def plot_loss_and_lr(train_loss, learning_rate): try: x = list(range(len(train_loss))) fig, ax1 = plt.subplots(1, 1) ax1.plot(x, train_loss, 'r', label='loss') ax1.set_xlabel("step") ax1.set_ylabel("loss") ax1.set_title("Train Loss and lr") plt.legend(loc='best') ax2 = ax1.twinx() ax2.plot(x, learning_rate, label='lr') ax2.set_ylabel("learning rate") ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 plt.legend(loc='best') handles1, labels1 = ax1.get_legend_handles_labels() handles2, labels2 = ax2.get_legend_handles_labels() plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) plt.close() print("successful save loss curve! ") except Exception as e: print(e) def plot_map(mAP): try: x = list(range(len(mAP))) plt.plot(x, mAP, label='mAp') plt.xlabel('epoch') plt.ylabel('mAP') plt.title('Eval mAP') plt.xlim(0, len(mAP)) plt.legend(loc='best') plt.savefig('./mAP.png') plt.close() print("successful save mAP curve!") except Exception as e: print(e) ================================================ FILE: pytorch_object_detection/mask_rcnn/predict.py ================================================ import os import time import json import numpy as np from PIL import Image import matplotlib.pyplot as plt import torch from torchvision import transforms from network_files import MaskRCNN from backbone import resnet50_fpn_backbone from draw_box_utils import draw_objs def create_model(num_classes, box_thresh=0.5): backbone = resnet50_fpn_backbone() model = MaskRCNN(backbone, num_classes=num_classes, rpn_score_thresh=box_thresh, box_score_thresh=box_thresh) return model def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): num_classes = 90 # 不包含背景 box_thresh = 0.5 weights_path = "./save_weights/model_25.pth" img_path = "./test.jpg" label_json_path = './coco91_indices.json' # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = create_model(num_classes=num_classes + 1, box_thresh=box_thresh) # load train weights assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) # read class_indict assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as json_file: category_index = json.load(json_file) # load image assert os.path.exists(img_path), f"{img_path} does not exits." original_img = Image.open(img_path).convert('RGB') # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() predictions = model(img.to(device))[0] t_end = time_synchronized() print("inference+NMS time: {}".format(t_end - t_start)) predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() predict_mask = predictions["masks"].to("cpu").numpy() predict_mask = np.squeeze(predict_mask, axis=1) # [batch, 1, h, w] -> [batch, h, w] if len(predict_boxes) == 0: print("没有检测到任何目标!") return plot_img = draw_objs(original_img, boxes=predict_boxes, classes=predict_classes, scores=predict_scores, masks=predict_mask, category_index=category_index, line_thickness=3, font='arial.ttf', font_size=20) plt.imshow(plot_img) plt.show() # 保存预测的图片结果 plot_img.save("test_result.jpg") if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/mask_rcnn/requirements.txt ================================================ lxml matplotlib numpy tqdm pycocotools Pillow torch==1.13.1 torchvision==0.11.1 ================================================ FILE: pytorch_object_detection/mask_rcnn/seg_results20220406-141544.txt ================================================ epoch:0 0.172 0.321 0.167 0.065 0.195 0.250 0.188 0.307 0.324 0.147 0.366 0.440 1.3826 0.08 epoch:1 0.223 0.395 0.225 0.092 0.249 0.322 0.222 0.354 0.372 0.186 0.413 0.499 1.0356 0.08 epoch:2 0.235 0.408 0.241 0.100 0.258 0.350 0.230 0.372 0.392 0.204 0.429 0.517 0.9718 0.08 epoch:3 0.246 0.426 0.252 0.103 0.267 0.357 0.241 0.386 0.408 0.225 0.448 0.521 0.9363 0.08 epoch:4 0.250 0.424 0.257 0.106 0.272 0.367 0.242 0.381 0.400 0.210 0.438 0.530 0.9145 0.08 epoch:5 0.255 0.434 0.262 0.109 0.279 0.375 0.242 0.379 0.398 0.209 0.433 0.534 0.8982 0.08 epoch:6 0.270 0.456 0.283 0.120 0.293 0.392 0.254 0.403 0.421 0.229 0.462 0.551 0.8859 0.08 epoch:7 0.269 0.455 0.280 0.118 0.296 0.388 0.257 0.402 0.421 0.228 0.454 0.564 0.8771 0.08 epoch:8 0.276 0.465 0.290 0.120 0.301 0.398 0.255 0.401 0.418 0.227 0.461 0.553 0.8685 0.08 epoch:9 0.271 0.458 0.282 0.113 0.297 0.404 0.253 0.398 0.417 0.211 0.460 0.570 0.8612 0.08 epoch:10 0.277 0.463 0.289 0.119 0.299 0.410 0.258 0.405 0.425 0.221 0.466 0.558 0.8547 0.08 epoch:11 0.276 0.463 0.287 0.122 0.304 0.405 0.259 0.406 0.425 0.236 0.466 0.559 0.8498 0.08 epoch:12 0.276 0.464 0.288 0.127 0.294 0.409 0.257 0.406 0.425 0.236 0.459 0.563 0.8461 0.08 epoch:13 0.284 0.477 0.296 0.124 0.311 0.412 0.262 0.407 0.429 0.229 0.474 0.555 0.8409 0.08 epoch:14 0.277 0.464 0.292 0.121 0.304 0.397 0.257 0.410 0.431 0.238 0.473 0.565 0.8355 0.08 epoch:15 0.282 0.474 0.296 0.121 0.308 0.413 0.264 0.411 0.432 0.231 0.473 0.575 0.833 0.08 epoch:16 0.336 0.549 0.356 0.149 0.367 0.491 0.288 0.451 0.473 0.269 0.519 0.620 0.7421 0.008 epoch:17 0.339 0.553 0.360 0.153 0.371 0.496 0.292 0.454 0.475 0.271 0.518 0.624 0.7157 0.008 epoch:18 0.340 0.553 0.361 0.150 0.371 0.494 0.290 0.453 0.473 0.269 0.516 0.620 0.7016 0.008 epoch:19 0.341 0.555 0.363 0.154 0.372 0.500 0.293 0.458 0.478 0.273 0.522 0.630 0.6897 0.008 epoch:20 0.340 0.554 0.361 0.154 0.370 0.496 0.289 0.450 0.471 0.266 0.514 0.622 0.6802 0.008 epoch:21 0.338 0.552 0.358 0.151 0.367 0.500 0.289 0.447 0.467 0.262 0.507 0.622 0.6708 0.008 epoch:22 0.340 0.553 0.360 0.151 0.370 0.500 0.290 0.450 0.470 0.267 0.513 0.623 0.6497 0.0008 epoch:23 0.340 0.552 0.361 0.151 0.369 0.500 0.290 0.449 0.468 0.266 0.509 0.619 0.6447 0.0008 epoch:24 0.339 0.552 0.359 0.150 0.369 0.500 0.290 0.448 0.468 0.264 0.510 0.619 0.6421 0.0008 epoch:25 0.338 0.551 0.359 0.152 0.367 0.500 0.289 0.448 0.467 0.264 0.509 0.618 0.6398 0.0008 ================================================ FILE: pytorch_object_detection/mask_rcnn/train.py ================================================ import os import datetime import torch from torchvision.ops.misc import FrozenBatchNorm2d import transforms from network_files import MaskRCNN from backbone import resnet50_fpn_backbone from my_dataset_coco import CocoDetection from my_dataset_voc import VOCInstances from train_utils import train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups def create_model(num_classes, load_pretrain_weights=True): # 如果GPU显存很小,batch_size不能设置很大,建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d) # FrozenBatchNorm2d的功能与BatchNorm2d类似,但参数无法更新 # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1'], 5代表全部训练 # backbone = resnet50_fpn_backbone(norm_layer=FrozenBatchNorm2d, # trainable_layers=3) # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth backbone = resnet50_fpn_backbone(pretrain_path="resnet50.pth", trainable_layers=3) model = MaskRCNN(backbone, num_classes=num_classes) if load_pretrain_weights: # coco weights url: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" weights_dict = torch.load("./maskrcnn_resnet50_fpn_coco.pth", map_location="cpu") for k in list(weights_dict.keys()): if ("box_predictor" in k) or ("mask_fcn_logits" in k): del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") det_results_file = f"det_results{now}.txt" seg_results_file = f"seg_results{now}.txt" data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } data_root = args.data_path # load train data set # coco2017 -> annotations -> instances_train2017.json train_dataset = CocoDetection(data_root, "train", data_transform["train"]) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt # train_dataset = VOCInstances(data_root, year="2012", txt_name="train.txt", transforms=data_transform["train"]) train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # coco2017 -> annotations -> instances_val2017.json val_dataset = CocoDetection(data_root, "val", data_transform["val"]) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt", transforms=data_transform["val"]) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # create model num_classes equal background + classes model = create_model(num_classes=args.num_classes + 1, load_pretrain_weights=args.pretrain) model.to(device) train_loss = [] learning_rate = [] val_map = [] # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 50 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset det_info, seg_info = utils.evaluate(model, val_data_loader, device=device) # write detection into txt with open(det_results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in det_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") # write seg into txt with open(seg_results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in seg_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(det_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/model_{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录 parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=90, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=26, type=int, metavar='N', help='number of total epochs to run') # 学习率 parser.add_argument('--lr', default=0.004, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练的batch size(如果内存/GPU显存充裕,建议设置更大) parser.add_argument('--batch_size', default=2, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) parser.add_argument("--pretrain", type=bool, default=True, help="load COCO pretrain weights.") # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/mask_rcnn/train_multi_GPU.py ================================================ import time import os import datetime import torch from torchvision.ops.misc import FrozenBatchNorm2d import transforms from my_dataset_coco import CocoDetection from my_dataset_voc import VOCInstances from backbone import resnet50_fpn_backbone from network_files import MaskRCNN import train_utils.train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir def create_model(num_classes, load_pretrain_weights=True): # 如果GPU显存很小,batch_size不能设置很大,建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d) # FrozenBatchNorm2d的功能与BatchNorm2d类似,但参数无法更新 # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1'], 5代表全部训练 # backbone = resnet50_fpn_backbone(norm_layer=FrozenBatchNorm2d, # trainable_layers=3) # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth backbone = resnet50_fpn_backbone(pretrain_path="resnet50.pth", trainable_layers=3) model = MaskRCNN(backbone, num_classes=num_classes) if load_pretrain_weights: # coco weights url: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" weights_dict = torch.load("./maskrcnn_resnet50_fpn_coco.pth", map_location="cpu") for k in list(weights_dict.keys()): if ("box_predictor" in k) or ("mask_fcn_logits" in k): del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # 用来保存coco_info的文件 now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") det_results_file = f"det_results{now}.txt" seg_results_file = f"seg_results{now}.txt" # Data loading code print("Loading data") data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } COCO_root = args.data_path # load train data set # coco2017 -> annotations -> instances_train2017.json train_dataset = CocoDetection(COCO_root, "train", data_transform["train"]) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt # train_dataset = VOCInstances(data_root, year="2012", txt_name="train.txt") # load validation data set # coco2017 -> annotations -> instances_val2017.json val_dataset = CocoDetection(COCO_root, "val", data_transform["val"]) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt") print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) if args.aspect_ratio_group_factor >= 0: # 统计所有图像比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) data_loader_test = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + classes model = create_model(num_classes=args.num_classes + 1, load_pretrain_weights=args.pretrain) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: utils.evaluate(model, data_loader_test, device=device) return train_loss = [] learning_rate = [] val_map = [] print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, warmup=True, scaler=scaler) # update learning rate lr_scheduler.step() # evaluate after every epoch det_info, seg_info = utils.evaluate(model, data_loader_test, device=device) # 只在主进程上进行写操作 if args.rank in [-1, 0]: train_loss.append(mean_loss.item()) learning_rate.append(lr) val_map.append(det_info[1]) # pascal mAP # write into txt with open(det_results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in det_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") with open(seg_results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in seg_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") if args.output_dir: # 只在主进程上执行保存权重操作 save_files = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() save_on_master(save_files, os.path.join(args.output_dir, f'model_{epoch}.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if args.rank in [-1, 0]: # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(coco2017) parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=90, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=26, type=int, metavar='N', help='number of total epochs to run') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 学习率,这个需要根据gpu的数量以及batch_size进行设置0.02 / bs * num_GPU parser.add_argument('--lr', default=0.005, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.StepLR的参数 parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=50, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) parser.add_argument('--test-only', action="store_true", help="test only") # 开启的进程数(注意不是线程) parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') parser.add_argument("--sync-bn", dest="sync_bn", help="Use sync batch norm", type=bool, default=False) parser.add_argument("--pretrain", type=bool, default=True, help="load COCO pretrain weights.") # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/mask_rcnn/train_utils/__init__.py ================================================ from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups from .distributed_utils import init_distributed_mode, save_on_master, mkdir from .coco_eval import EvalCOCOMetric from .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api ================================================ FILE: pytorch_object_detection/mask_rcnn/train_utils/coco_eval.py ================================================ import json import copy import numpy as np from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval import pycocotools.mask as mask_util from .distributed_utils import all_gather, is_main_process def merge(img_ids, eval_results): """将多个进程之间的数据汇总在一起""" all_img_ids = all_gather(img_ids) all_eval_results = all_gather(eval_results) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_results = [] for p in all_eval_results: merged_eval_results.extend(p) merged_img_ids = np.array(merged_img_ids) # keep only unique (and in sorted order) images # 去除重复的图片索引,多GPU训练时为了保证每个进程的训练图片数量相同,可能将一张图片分配给多个进程 merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_results = [merged_eval_results[i] for i in idx] return list(merged_img_ids), merged_eval_results class EvalCOCOMetric: def __init__(self, coco: COCO = None, iou_type: str = None, results_file_name: str = "predict_results.json", classes_mapping: dict = None): self.coco = copy.deepcopy(coco) self.img_ids = [] # 记录每个进程处理图片的ids self.results = [] self.aggregation_results = None self.classes_mapping = classes_mapping self.coco_evaluator = None assert iou_type in ["bbox", "segm", "keypoints"] self.iou_type = iou_type self.results_file_name = results_file_name def prepare_for_coco_detection(self, targets, outputs): """将预测的结果转换成COCOeval指定的格式,针对目标检测任务""" # 遍历每张图像的预测结果 for target, output in zip(targets, outputs): if len(output) == 0: continue img_id = int(target["image_id"]) if img_id in self.img_ids: # 防止出现重复的数据 continue self.img_ids.append(img_id) per_image_boxes = output["boxes"] # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h] # 而我们预测的box格式是[x_min, y_min, x_max, y_max],所以需要转下格式 per_image_boxes[:, 2:] -= per_image_boxes[:, :2] per_image_classes = output["labels"].tolist() per_image_scores = output["scores"].tolist() res_list = [] # 遍历每个目标的信息 for object_score, object_class, object_box in zip( per_image_scores, per_image_classes, per_image_boxes): object_score = float(object_score) class_idx = int(object_class) if self.classes_mapping is not None: class_idx = int(self.classes_mapping[str(class_idx)]) # We recommend rounding coordinates to the nearest tenth of a pixel # to reduce resulting JSON file size. object_box = [round(b, 2) for b in object_box.tolist()] res = {"image_id": img_id, "category_id": class_idx, "bbox": object_box, "score": round(object_score, 3)} res_list.append(res) self.results.append(res_list) def prepare_for_coco_segmentation(self, targets, outputs): """将预测的结果转换成COCOeval指定的格式,针对实例分割任务""" # 遍历每张图像的预测结果 for target, output in zip(targets, outputs): if len(output) == 0: continue img_id = int(target["image_id"]) if img_id in self.img_ids: # 防止出现重复的数据 continue self.img_ids.append(img_id) per_image_masks = output["masks"] per_image_classes = output["labels"].tolist() per_image_scores = output["scores"].tolist() masks = per_image_masks > 0.5 res_list = [] # 遍历每个目标的信息 for mask, label, score in zip(masks, per_image_classes, per_image_scores): rle = mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] rle["counts"] = rle["counts"].decode("utf-8") class_idx = int(label) if self.classes_mapping is not None: class_idx = int(self.classes_mapping[str(class_idx)]) res = {"image_id": img_id, "category_id": class_idx, "segmentation": rle, "score": round(score, 3)} res_list.append(res) self.results.append(res_list) def update(self, targets, outputs): if self.iou_type == "bbox": self.prepare_for_coco_detection(targets, outputs) elif self.iou_type == "segm": self.prepare_for_coco_segmentation(targets, outputs) else: raise KeyError(f"not support iou_type: {self.iou_type}") def synchronize_results(self): # 同步所有进程中的数据 eval_ids, eval_results = merge(self.img_ids, self.results) self.aggregation_results = {"img_ids": eval_ids, "results": eval_results} # 主进程上保存即可 if is_main_process(): results = [] [results.extend(i) for i in eval_results] # write predict results into json file json_str = json.dumps(results, indent=4) with open(self.results_file_name, 'w') as json_file: json_file.write(json_str) def evaluate(self): # 只在主进程上评估即可 if is_main_process(): # accumulate predictions from all images coco_true = self.coco coco_pre = coco_true.loadRes(self.results_file_name) self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type) self.coco_evaluator.evaluate() self.coco_evaluator.accumulate() print(f"IoU metric: {self.iou_type}") self.coco_evaluator.summarize() coco_info = self.coco_evaluator.stats.tolist() # numpy to list return coco_info else: return None ================================================ FILE: pytorch_object_detection/mask_rcnn/train_utils/coco_utils.py ================================================ import torch import torch.utils.data from pycocotools import mask as coco_mask from pycocotools.coco import COCO def coco_remove_images_without_annotations(dataset, ids): """ 删除coco数据集中没有目标,或者目标面积非常小的数据 refer to: https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py :param dataset: :param cat_list: :return: """ def _has_only_empty_bbox(anno): return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) def _has_valid_annotation(anno): # if it's empty, there is no annotation if len(anno) == 0: return False # if all boxes have close to zero area, there is no annotation if _has_only_empty_bbox(anno): return False return True valid_ids = [] for ds_idx, img_id in enumerate(ids): ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None) anno = dataset.loadAnns(ann_ids) if _has_valid_annotation(anno): valid_ids.append(img_id) return valid_ids def convert_coco_poly_mask(segmentations, height, width): masks = [] for polygons in segmentations: rles = coco_mask.frPyObjects(polygons, height, width) mask = coco_mask.decode(rles) if len(mask.shape) < 3: mask = mask[..., None] mask = torch.as_tensor(mask, dtype=torch.uint8) mask = mask.any(dim=2) masks.append(mask) if masks: masks = torch.stack(masks, dim=0) else: # 如果mask为空,则说明没有目标,直接返回数值为0的mask masks = torch.zeros((0, height, width), dtype=torch.uint8) return masks def convert_to_coco_api(self): coco_ds = COCO() # annotation IDs need to start at 1, not 0, see torchvision issue #1530 ann_id = 1 dataset = {"images": [], "categories": [], "annotations": []} categories = set() for img_idx in range(len(self)): targets, h, w = self.get_annotations(img_idx) img_id = targets["image_id"].item() img_dict = {"id": img_id, "height": h, "width": w} dataset["images"].append(img_dict) bboxes = targets["boxes"].clone() # convert (x_min, ymin, xmax, ymax) to (xmin, ymin, w, h) bboxes[:, 2:] -= bboxes[:, :2] bboxes = bboxes.tolist() labels = targets["labels"].tolist() areas = targets["area"].tolist() iscrowd = targets["iscrowd"].tolist() if "masks" in targets: masks = targets["masks"] # make masks Fortran contiguous for coco_mask masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) num_objs = len(bboxes) for i in range(num_objs): ann = {"image_id": img_id, "bbox": bboxes[i], "category_id": labels[i], "area": areas[i], "iscrowd": iscrowd[i], "id": ann_id} categories.add(labels[i]) if "masks" in targets: ann["segmentation"] = coco_mask.encode(masks[i].numpy()) dataset["annotations"].append(ann) ann_id += 1 dataset["categories"] = [{"id": i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds ================================================ FILE: pytorch_object_detection/mask_rcnn/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ 收集各个进程中的数据 Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() # 进程数 if world_size == 1: return [data] data_list = [None] * world_size dist.all_gather_object(data_list, data) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_object_detection/mask_rcnn/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k+1等份 bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_object_detection/mask_rcnn/train_utils/train_eval_utils.py ================================================ import math import sys import time import torch import train_utils.distributed_utils as utils from .coco_eval import EvalCOCOMetric def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mloss = torch.zeros(1).to(device) # mean losses for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with torch.cuda.amp.autocast(enabled=scaler is not None): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() if scaler is not None: scaler.scale(losses).backward() scaler.step(optimizer) scaler.update() else: losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " det_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type="bbox", results_file_name="det_results.json") seg_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type="segm", results_file_name="seg_results.json") for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time det_metric.update(targets, outputs) seg_metric.update(targets, outputs) metric_logger.update(model_time=model_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # 同步所有进程中的数据 det_metric.synchronize_results() seg_metric.synchronize_results() if utils.is_main_process(): coco_info = det_metric.evaluate() seg_info = seg_metric.evaluate() else: coco_info = None seg_info = None return coco_info, seg_info ================================================ FILE: pytorch_object_detection/mask_rcnn/transforms.py ================================================ import random from torchvision.transforms import functional as F class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class ToTensor(object): """将PIL图像转为Tensor""" def __call__(self, image, target): image = F.to_tensor(image) return image, target class RandomHorizontalFlip(object): """随机水平翻转图像以及bboxes""" def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: height, width = image.shape[-2:] image = image.flip(-1) # 水平翻转图片 bbox = target["boxes"] # bbox: xmin, ymin, xmax, ymax bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 target["boxes"] = bbox if "masks" in target: target["masks"] = target["masks"].flip(-1) return image, target ================================================ FILE: pytorch_object_detection/mask_rcnn/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 以及每个类别的mAP(IoU=0.5) """ import os import json import torch from tqdm import tqdm import numpy as np import transforms from backbone import resnet50_fpn_backbone from network_files import MaskRCNN from my_dataset_coco import CocoDetection from my_dataset_voc import VOCInstances from train_utils import EvalCOCOMetric def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 12, [""] * 12 stats[0], print_list[0] = _summarize(1) stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0]) stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1]) stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2]) stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def save_info(coco_evaluator, category_index: dict, save_name: str = "record_mAP.txt"): iou_type = coco_evaluator.params.iouType print(f"IoU metric: {iou_type}") # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_evaluator) # calculate voc info for every classes(IoU=0.5) classes = [v for v in category_index.values() if v != "N/A"] voc_map_info_list = [] for i in range(len(classes)): stats, _ = summarize(coco_evaluator, catId=i) voc_map_info_list.append(" {:15}: {}".format(classes[i], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open(save_name, "w") as f: record_lines = ["COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc] f.write("\n".join(record_lines)) def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = { "val": transforms.Compose([transforms.ToTensor()]) } # read class_indict label_json_path = parser_data.label_json_path assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: category_index = json.load(f) data_root = parser_data.data_path # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = CocoDetection(data_root, "val", data_transform["val"]) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt # val_dataset = VOCInstances(data_root, year="2012", txt_name="val.txt", transforms=data_transform["val"]) val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model backbone = resnet50_fpn_backbone() model = MaskRCNN(backbone, num_classes=args.num_classes + 1) # 载入你自己训练好的模型权重 weights_path = parser_data.weights_path assert os.path.exists(weights_path), "not found {} file.".format(weights_path) model.load_state_dict(torch.load(weights_path, map_location='cpu')['model']) # print(model) model.to(device) # evaluate on the val dataset cpu_device = torch.device("cpu") det_metric = EvalCOCOMetric(val_dataset.coco, "bbox", "det_results.json") seg_metric = EvalCOCOMetric(val_dataset.coco, "segm", "seg_results.json") model.eval() with torch.no_grad(): for image, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] det_metric.update(targets, outputs) seg_metric.update(targets, outputs) det_metric.synchronize_results() seg_metric.synchronize_results() det_metric.evaluate() seg_metric.evaluate() save_info(det_metric.coco_evaluator, category_index, "det_record_mAP.txt") save_info(seg_metric.coco_evaluator, category_index, "seg_record_mAP.txt") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', type=int, default=90, help='number of classes') # 数据集的根目录 parser.add_argument('--data-path', default='/data/coco2017', help='dataset root') # 训练好的权重文件 parser.add_argument('--weights-path', default='./save_weights/model_25.pth', type=str, help='training weights') # batch size(set to 1, don't change) parser.add_argument('--batch-size', default=1, type=int, metavar='N', help='batch size when validation.') # 类别索引和类别名称对应关系 parser.add_argument('--label-json-path', type=str, default="coco91_indices.json") args = parser.parse_args() main(args) ================================================ FILE: pytorch_object_detection/retinaNet/README.md ================================================ # RetinaNet ## 该项目主要是来自pytorch官方torchvision模块中的源码 * https://github.com/pytorch/vision/tree/master/torchvision/models/detection ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.7.1(注意:必须是1.6.0或以上,因为使用官方提供的混合精度训练1.6.0后才支持) * pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs)) * Ubuntu或Centos(不建议Windows) * 最好使用GPU训练 * 详细环境配置见`requirements.txt` ## 文件结构: ``` ├── backbone: 特征提取网络(ResNet50+FPN) ├── network_files: RetinaNet网络 ├── train_utils: 训练验证相关模块(包括cocotools) ├── my_dataset.py: 自定义dataset用于读取VOC数据集 ├── train.py: 以resnet50+FPN做为backbone进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 └── pascal_voc_classes.json: pascal_voc标签文件(注意索引从0开始,不包括背景) ``` ## 预训练权重下载地址(下载后放入backbone文件夹中): * ResNet50+FPN backbone: https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是`retinanet_resnet50_fpn_coco.pth`文件, 不是`retinanet_resnet50_fpn_coco-eeacb38b.pth` ## 数据集,本例程使用的是PASCAL VOC2012数据集 * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的bilibili:https://b23.tv/F1kSCK * 基于迁移学习在PASCAL VOC2012训练集训练得到的权重: 链接: https://pan.baidu.com/s/1mqrBFWuJ_lfDloCfVjWqaA 密码: sw0t * 在PASCAL VOC2012验证集上结果: ``` Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.563 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.798 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.616 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.236 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.434 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.626 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.486 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.688 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.707 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.421 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.604 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.758 ``` ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要单GPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_GPU.py` ## 注意事项 * 在使用训练脚本时,注意要将`--data-path`(VOC_root)设置为自己存放`VOCdevkit`文件夹所在的**根目录** * 由于带有FPN结构的Faster RCNN很吃显存,如果GPU的显存不够(如果batch_size小于8的话)建议在create_model函数中使用默认的norm_layer, 即不传递norm_layer变量,默认去使用FrozenBatchNorm2d(即不会去更新参数的bn层),使用中发现效果也很好。 * 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 * 在使用预测脚本时,要将`weights_path`设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可,其他代码尽量不要改动 ================================================ FILE: pytorch_object_detection/retinaNet/backbone/__init__.py ================================================ from .feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, LastLevelMaxPool from .resnet50_fpn_model import resnet50_fpn_backbone ================================================ FILE: pytorch_object_detection/retinaNet/backbone/feature_pyramid_network.py ================================================ from collections import OrderedDict import torch.nn as nn import torch from torch import Tensor import torch.nn.functional as F from torch.jit.annotations import Tuple, List, Dict class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Arguments: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model, return_layers): if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} layers = OrderedDict() # 遍历模型子模块按顺序存入有序字典 # 只保存layer4及其之前的结构,舍去之后不用的结构 for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super().__init__(layers) self.return_layers = orig_return_layers def forward(self, x): out = OrderedDict() # 依次遍历模型的所有子模块,并进行正向传播, # 收集layer1, layer2, layer3, layer4的输出 for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class BackboneWithFPN(nn.Module): """ Adds a FPN on top of a model. Internally, it uses torchvision.models._utils.IntermediateLayerGetter to extract a submodel that returns the feature maps specified in return_layers. The same limitations of IntermediatLayerGetter apply here. Arguments: backbone (nn.Module) return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). in_channels_list (List[int]): number of channels for each feature map that is returned, in the order they are present in the OrderedDict out_channels (int): number of channels in the FPN. extra_blocks: ExtraFPNBlock Attributes: out_channels (int): the number of channels in the FPN """ def __init__(self, backbone: nn.Module, return_layers=None, in_channels_list=None, out_channels=256, extra_blocks=None, re_getter=True): super().__init__() if extra_blocks is None: extra_blocks = LastLevelMaxPool() if re_getter: assert return_layers is not None self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) else: self.body = backbone self.fpn = FeaturePyramidNetwork( in_channels_list=in_channels_list, out_channels=out_channels, extra_blocks=extra_blocks, ) self.out_channels = out_channels def forward(self, x): x = self.body(x) x = self.fpn(x) return x class ExtraFPNBlock(nn.Module): """ Base class for the extra block in the FPN. Args: results (List[Tensor]): the result of the FPN x (List[Tensor]): the original feature maps names (List[str]): the names for each one of the original feature maps Returns: results (List[Tensor]): the extended set of results of the FPN names (List[str]): the extended set of names for the results """ def forward(self, results: List[Tensor], x: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]: pass class LastLevelMaxPool(torch.nn.Module): """ Applies a max_pool2d on top of the last feature map """ def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]: names.append("pool") x.append(F.max_pool2d(x[-1], 1, 2, 0)) return x, names class LastLevelP6P7(ExtraFPNBlock): """ This module is used in RetinaNet to generate extra layers, P6 and P7. """ def __init__(self, in_channels: int, out_channels: int): super().__init__() self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) for module in [self.p6, self.p7]: nn.init.kaiming_uniform_(module.weight, a=1) nn.init.constant_(module.bias, 0) self.use_P5 = in_channels == out_channels def forward(self, p: List[Tensor], c: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]: p5, c5 = p[-1], c[-1] x = p5 if self.use_P5 else c5 p6 = self.p6(x) p7 = self.p7(F.relu(p6)) p.extend([p6, p7]) names.extend(["p6", "p7"]) return p, names class FeaturePyramidNetwork(nn.Module): """ Module that adds a FPN from on top of a set of feature maps. This is based on `"Feature Pyramid Network for Object Detection" `_. The feature maps are currently supposed to be in increasing depth order. The input to the model is expected to be an OrderedDict[Tensor], containing the feature maps on top of which the FPN will be added. Arguments: in_channels_list (list[int]): number of channels for each feature map that is passed to the module out_channels (int): number of channels of the FPN representation extra_blocks (ExtraFPNBlock or None): if provided, extra operations will be performed. It is expected to take the fpn features, the original features and the names of the original features as input, and returns a new list of feature maps and their corresponding names """ def __init__(self, in_channels_list, out_channels, extra_blocks=None): super().__init__() # 用来调整resnet特征矩阵(layer1,2,3,4)的channel(kernel_size=1) self.inner_blocks = nn.ModuleList() # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵 self.layer_blocks = nn.ModuleList() for in_channels in in_channels_list: if in_channels == 0: continue inner_block_module = nn.Conv2d(in_channels, out_channels, 1) layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) self.inner_blocks.append(inner_block_module) self.layer_blocks.append(layer_block_module) # initialize parameters now to avoid modifying the initialization of top_blocks for m in self.children(): if isinstance(m, nn.Conv2d): nn.init.kaiming_uniform_(m.weight, a=1) nn.init.constant_(m.bias, 0) self.extra_blocks = extra_blocks def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.inner_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.inner_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.inner_blocks: if i == idx: out = module(x) i += 1 return out def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.layer_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.layer_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.layer_blocks: if i == idx: out = module(x) i += 1 return out def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: """ Computes the FPN for a set of feature maps. Arguments: x (OrderedDict[Tensor]): feature maps for each feature level. Returns: results (OrderedDict[Tensor]): feature maps after FPN layers. They are ordered from highest resolution first. """ # unpack OrderedDict into two lists for easier handling names = list(x.keys()) x = list(x.values()) # 将resnet layer4的channel调整到指定的out_channels # last_inner = self.inner_blocks[-1](x[-1]) last_inner = self.get_result_from_inner_blocks(x[-1], -1) # result中保存着每个预测特征层 results = [] # 将layer4调整channel后的特征矩阵,通过3x3卷积后得到对应的预测特征矩阵 # results.append(self.layer_blocks[-1](last_inner)) results.append(self.get_result_from_layer_blocks(last_inner, -1)) for idx in range(len(x) - 2, -1, -1): inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) feat_shape = inner_lateral.shape[-2:] inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") last_inner = inner_lateral + inner_top_down results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) # 在layer4对应的预测特征层基础上生成预测特征矩阵5 if self.extra_blocks is not None: results, names = self.extra_blocks(results, x, names) # make it back an OrderedDict out = OrderedDict([(k, v) for k, v in zip(names, results)]) return out ================================================ FILE: pytorch_object_detection/retinaNet/backbone/resnet50_fpn_model.py ================================================ import os import torch.nn as nn import torch from torchvision.ops.misc import FrozenBatchNorm2d from .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = norm_layer(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = norm_layer(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = norm_layer(out_channel * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): norm_layer = self._norm_layer downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), norm_layer(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, norm_layer=norm_layer)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, norm_layer=norm_layer)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def overwrite_eps(model, eps): """ This method overwrites the default eps values of all the FrozenBatchNorm2d layers of the model with the provided value. This is necessary to address the BC-breaking change introduced by the bug-fix at pytorch/vision#2933. The overwrite is applied only when the pretrained weights are loaded to maintain compatibility with previous versions. Args: model (nn.Module): The model on which we perform the overwrite. eps (float): The new value of eps. """ for module in model.modules(): if isinstance(module, FrozenBatchNorm2d): module.eps = eps def resnet50_fpn_backbone(pretrain_path="", norm_layer=FrozenBatchNorm2d, # FrozenBatchNorm2d的功能与BatchNorm2d类似,但参数无法更新 trainable_layers=3, returned_layers=None, extra_blocks=None): """ 搭建resnet50_fpn——backbone Args: pretrain_path: resnet50的预训练权重,如果不使用就默认为空 norm_layer: 官方默认的是FrozenBatchNorm2d,即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差,还不如不用bn层) 如果自己的GPU显存很大可以设置很大的batch_size,那么自己可以传入正常的BatchNorm2d层 (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) trainable_layers: 指定训练哪些层结构 returned_layers: 指定哪些层的输出需要返回 extra_blocks: 在输出的特征层基础上额外添加的层结构 Returns: """ resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3], include_top=False, norm_layer=norm_layer) if isinstance(norm_layer, FrozenBatchNorm2d): overwrite_eps(resnet_backbone, 0.0) if pretrain_path != "": assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path) # 载入预训练权重 print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False)) # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # 如果要训练所有层结构的话,不要忘了conv1后还有一个bn1 if trainable_layers == 5: layers_to_train.append("bn1") # freeze layers for name, parameter in resnet_backbone.named_parameters(): # 只训练不在layers_to_train列表中的层结构 if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) if extra_blocks is None: extra_blocks = LastLevelMaxPool() if returned_layers is None: returned_layers = [1, 2, 3, 4] # 返回的特征层个数肯定大于0小于5 assert min(returned_layers) > 0 and max(returned_layers) < 5 # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)} # in_channel 为layer4的输出特征矩阵channel = 2048 in_channels_stage2 = resnet_backbone.in_channel // 8 # 256 # 记录resnet50提供给fpn的特征层channels in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] # 通过fpn后得到的每个特征层的channel out_channels = 256 return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) ================================================ FILE: pytorch_object_detection/retinaNet/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = False): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: pytorch_object_detection/retinaNet/my_dataset.py ================================================ from torch.utils.data import Dataset import os import torch import json from PIL import Image from lxml import etree class VOCDataSet(Dataset): """读取解析PASCAL VOC2007/2012数据集""" def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): assert year in ["2007", "2012"], "year must be in ['2007', '2012']" # 增加容错能力 if "VOCdevkit" in voc_root: self.root = os.path.join(voc_root, f"VOC{year}") else: self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") self.img_root = os.path.join(self.root, "JPEGImages") self.annotations_root = os.path.join(self.root, "Annotations") # read train.txt or val.txt file txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name) assert os.path.exists(txt_path), "not found {} file.".format(txt_name) with open(txt_path) as read: self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml") for line in read.readlines() if len(line.strip()) > 0] # check file assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path) for xml_path in self.xml_list: assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path) # read class_indict json_file = './pascal_voc_classes.json' assert os.path.exists(json_file), "{} file not exist.".format(json_file) with open(json_file, 'r') as f: self.class_dict = json.load(f) self.transforms = transforms def __len__(self): return len(self.xml_list) def __getitem__(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] img_path = os.path.join(self.img_root, data["filename"]) image = Image.open(img_path) if image.format != "JPEG": raise ValueError("Image '{}' format not JPEG".format(img_path)) boxes = [] labels = [] iscrowd = [] assert "object" in data, "{} lack of object information.".format(xml_path) for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) if "difficult" in obj: iscrowd.append(int(obj["difficult"])) else: iscrowd.append(0) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd if self.transforms is not None: image, target = self.transforms(image, target) return image, target def get_height_and_width(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) return data_height, data_width def parse_xml_to_dict(self, xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = self.parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def coco_index(self, idx): """ 该方法是专门为pycocotools统计标签信息准备,不对图像和标签作任何处理 由于不用去读取图片,可大幅缩减统计时间 Args: idx: 输入需要获取图像的索引 """ # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) # img_path = os.path.join(self.img_root, data["filename"]) # image = Image.open(img_path) # if image.format != "JPEG": # raise ValueError("Image format not JPEG") boxes = [] labels = [] iscrowd = [] for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) # 进一步检查数据,有的标注信息中可能有w或h为0的情况,这样的数据会导致计算回归loss为nan if xmax <= xmin or ymax <= ymin: print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path)) continue boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) iscrowd.append(int(obj["difficult"])) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd return (data_height, data_width), target @staticmethod def collate_fn(batch): return tuple(zip(*batch)) # import transforms # from draw_box_utils import draw_objs # from PIL import Image # import json # import matplotlib.pyplot as plt # import torchvision.transforms as ts # import random # # # read class_indict # category_index = {} # try: # json_file = open('./pascal_voc_classes.json', 'r') # class_dict = json.load(json_file) # category_index = {str(v): str(k) for k, v in class_dict.items()} # except Exception as e: # print(e) # exit(-1) # # data_transform = { # "train": transforms.Compose([transforms.ToTensor(), # transforms.RandomHorizontalFlip(0.5)]), # "val": transforms.Compose([transforms.ToTensor()]) # } # # # load train data set # train_data_set = VOCDataSet(os.getcwd(), "2012", data_transform["train"], "train.txt") # print(len(train_data_set)) # for index in random.sample(range(0, len(train_data_set)), k=5): # img, target = train_data_set[index] # img = ts.ToPILImage()(img) # plot_img = draw_objs(img, # target["boxes"].numpy(), # target["labels"].numpy(), # np.ones(target["labels"].shape[0]), # category_index=category_index, # box_thresh=0.5, # line_thickness=3, # font='arial.ttf', # font_size=20) # plt.imshow(plot_img) # plt.show() ================================================ FILE: pytorch_object_detection/retinaNet/network_files/__init__.py ================================================ from .retinanet import RetinaNet ================================================ FILE: pytorch_object_detection/retinaNet/network_files/anchor_utils.py ================================================ from typing import List, Optional, Dict import torch from torch import nn, Tensor from .image_list import ImageList class AnchorsGenerator(nn.Module): __annotations__ = { "cell_anchors": Optional[List[torch.Tensor]], "_cache": Dict[str, List[torch.Tensor]] } """ anchors生成器 Module that generates anchors for a set of feature maps and image sizes. The module support computing anchors at multiple sizes and aspect ratios per feature map. sizes and aspect_ratios should have the same number of elements, and it should correspond to the number of feature maps. sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors per spatial location for feature map i. Arguments: sizes (Tuple[Tuple[int]]): aspect_ratios (Tuple[Tuple[float]]): """ def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)): super(AnchorsGenerator, self).__init__() if not isinstance(sizes[0], (list, tuple)): # TODO change this sizes = tuple((s,) for s in sizes) if not isinstance(aspect_ratios[0], (list, tuple)): aspect_ratios = (aspect_ratios,) * len(sizes) assert len(sizes) == len(aspect_ratios) self.sizes = sizes self.aspect_ratios = aspect_ratios self.cell_anchors = None self._cache = {} def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")): # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor """ compute anchor sizes Arguments: scales: sqrt(anchor_area) aspect_ratios: h/w ratios dtype: float32 device: cpu/gpu """ scales = torch.as_tensor(scales, dtype=dtype, device=device) aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) h_ratios = torch.sqrt(aspect_ratios) w_ratios = 1.0 / h_ratios # [r1, r2, r3]' * [s1, s2, s3] # number of elements is len(ratios)*len(scales) ws = (w_ratios[:, None] * scales[None, :]).view(-1) hs = (h_ratios[:, None] * scales[None, :]).view(-1) # left-top, right-bottom coordinate relative to anchor center(0, 0) # 生成的anchors模板都是以(0, 0)为中心的, shape [len(ratios)*len(scales), 4] base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 return base_anchors.round() # round 四舍五入 def set_cell_anchors(self, dtype, device): # type: (torch.dtype, torch.device) -> None if self.cell_anchors is not None: cell_anchors = self.cell_anchors assert cell_anchors is not None # suppose that all anchors have the same device # which is a valid assumption in the current state of the codebase if cell_anchors[0].device == device: return # 根据提供的sizes和aspect_ratios生成anchors模板 # anchors模板都是以(0, 0)为中心的anchor cell_anchors = [ self.generate_anchors(sizes, aspect_ratios, dtype, device) for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios) ] self.cell_anchors = cell_anchors def num_anchors_per_location(self): # 计算每个预测特征层上每个滑动窗口的预测目标数 return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. def grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """ anchors position in grid coordinate axis map into origin image 计算预测特征图对应原始图像上的所有anchors的坐标 Args: grid_sizes: 预测特征矩阵的height和width strides: 预测特征矩阵上一步对应原始图像上的步距 """ anchors = [] cell_anchors = self.cell_anchors assert cell_anchors is not None # 遍历每个预测特征层的grid_size,strides和cell_anchors for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): grid_height, grid_width = size stride_height, stride_width = stride device = base_anchors.device # For output anchor, compute [x_center, y_center, x_center, y_center] # shape: [grid_width] 对应原图上的x坐标(列) shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width # shape: [grid_height] 对应原图上的y坐标(行) shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量) # torch.meshgrid函数分别传入行坐标和列坐标,生成网格行坐标矩阵和网格列坐标矩阵 # shape: [grid_height, grid_width] shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量 # shape: [grid_width*grid_height, 4] shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1) # For every (base anchor, output anchor) pair, # offset each zero-centered base anchor by the center of the output anchor. # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制) shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4) anchors.append(shifts_anchor.reshape(-1, 4)) return anchors # List[Tensor(all_num_anchors, 4)] def cached_grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """将计算得到的所有anchors信息进行缓存""" key = str(grid_sizes) + str(strides) # self._cache是字典类型 if key in self._cache: return self._cache[key] anchors = self.grid_anchors(grid_sizes, strides) self._cache[key] = anchors return anchors def forward(self, image_list, feature_maps): # type: (ImageList, List[Tensor]) -> List[Tensor] # 获取每个预测特征层的尺寸(height, width) grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps]) # 获取输入图像的height和width image_size = image_list.tensors.shape[-2:] # 获取变量类型和设备类型 dtype, device = feature_maps[0].dtype, feature_maps[0].device # one step in feature map equate n pixel stride in origin image # 计算特征层上的一步等于原始图像上的步长 strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device), torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes] # 根据提供的sizes和aspect_ratios生成anchors模板 self.set_cell_anchors(dtype, device) # 计算/读取所有anchors的坐标信息(这里的anchors信息是映射到原图上的所有anchors信息,不是anchors模板) # 得到的是一个list列表,对应每张预测特征图映射回原图的anchors坐标信息 anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # 遍历一个batch中的每张图像 for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image = [] # 遍历每张预测特征图映射回原图的anchors坐标信息 for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起 # anchors是个list,每个元素为一张图像的所有anchors信息 anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] # Clear the cache in case that memory leaks. self._cache.clear() return anchors ================================================ FILE: pytorch_object_detection/retinaNet/network_files/boxes.py ================================================ import torch from typing import Tuple from torch import Tensor import torchvision def nms(boxes, scores, iou_threshold): # type: (Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). NMS iteratively removes lower scoring boxes which have an IoU greater than iou_threshold with another (higher scoring) box. Parameters ---------- boxes : Tensor[N, 4]) boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes iou_threshold : float discards all overlapping boxes with IoU > iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ return torch.ops.torchvision.nms(boxes, scores, iou_threshold) def batched_nms(boxes, scores, idxs, iou_threshold): # type: (Tensor, Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Parameters ---------- boxes : Tensor[N, 4] boxes where NMS will be performed. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes idxs : Tensor[N] indices of the categories for each one of the boxes. iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) # strategy: in order to perform NMS independently per class. # we add an offset to all the boxes. The offset is dependent # only on the class idx, and is large enough so that boxes # from different classes do not overlap # 获取所有boxes中最大的坐标值(xmin, ymin, xmax, ymax) max_coordinate = boxes.max() # to(): Performs Tensor dtype and/or device conversion # 为每一个类别/每一层生成一个很大的偏移量 # 这里的to只是让生成tensor的dytpe和device与boxes保持一致 offsets = idxs.to(boxes) * (max_coordinate + 1) # boxes加上对应层的偏移量后,保证不同类别/层之间boxes不会有重合的现象 boxes_for_nms = boxes + offsets[:, None] keep = nms(boxes_for_nms, scores, iou_threshold) return keep def remove_small_boxes(boxes, min_size): # type: (Tensor, float) -> Tensor """ Remove boxes which contains at least one side smaller than min_size. 移除宽高小于指定阈值的索引 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format min_size (float): minimum size Returns: keep (Tensor[K]): indices of the boxes that have both sides larger than min_size """ ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # 预测boxes的宽和高 # keep = (ws >= min_size) & (hs >= min_size) # 当满足宽,高都大于给定阈值时为True keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size)) # nonzero(): Returns a tensor containing the indices of all non-zero elements of input # keep = keep.nonzero().squeeze(1) keep = torch.where(keep)[0] return keep def clip_boxes_to_image(boxes, size): # type: (Tensor, Tuple[int, int]) -> Tensor """ Clip boxes so that they lie inside an image of size `size`. 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format size (Tuple[height, width]): size of the image Returns: clipped_boxes (Tensor[N, 4]) """ dim = boxes.dim() boxes_x = boxes[..., 0::2] # x1, x2 boxes_y = boxes[..., 1::2] # y1, y2 height, width = size if torchvision._is_tracing(): boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device)) else: boxes_x = boxes_x.clamp(min=0, max=width) # 限制x坐标范围在[0,width]之间 boxes_y = boxes_y.clamp(min=0, max=height) # 限制y坐标范围在[0,height]之间 clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) return clipped_boxes.reshape(boxes.shape) def box_area(boxes): """ Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. Arguments: boxes (Tensor[N, 4]): boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format Returns: area (Tensor[N]): area for each box """ return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def box_iou(boxes1, boxes2): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: boxes1 (Tensor[N, 4]) boxes2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ area1 = box_area(boxes1) area2 = box_area(boxes2) # When the shapes do not match, # the shape of the returned output tensor follows the broadcasting rules lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # left-top [N,M,2] rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # right-bottom [N,M,2] wh = (rb - lt).clamp(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] iou = inter / (area1[:, None] + area2 - inter) return iou ================================================ FILE: pytorch_object_detection/retinaNet/network_files/det_utils.py ================================================ import torch import math from typing import List, Tuple from torch import Tensor class BalancedPositiveNegativeSampler(object): """ This class samples batches, ensuring that they contain a fixed proportion of positives """ def __init__(self, batch_size_per_image, positive_fraction): # type: (int, float) -> None """ Arguments: batch_size_per_image (int): number of elements to be selected per image positive_fraction (float): percentage of positive elements per batch """ self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction def __call__(self, matched_idxs): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ Arguments: matched idxs: list of tensors containing -1, 0 or positive values. Each tensor corresponds to a specific image. -1 values are ignored, 0 are considered as negatives and > 0 as positives. Returns: pos_idx (list[tensor]) neg_idx (list[tensor]) Returns two lists of binary masks for each image. The first list contains the positive elements that were selected, and the second list the negative example. """ pos_idx = [] neg_idx = [] # 遍历每张图像的matched_idxs for matched_idxs_per_image in matched_idxs: # >= 1的为正样本, nonzero返回非零元素索引 # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0] # = 0的为负样本 # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0] # 指定正样本的数量 num_pos = int(self.batch_size_per_image * self.positive_fraction) # protect against not enough positive examples # 如果正样本数量不够就直接采用所有正样本 num_pos = min(positive.numel(), num_pos) # 指定负样本数量 num_neg = self.batch_size_per_image - num_pos # protect against not enough negative examples # 如果负样本数量不够就直接采用所有负样本 num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples # Returns a random permutation of integers from 0 to n - 1. # 随机选择指定数量的正负样本 perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] pos_idx_per_image = positive[perm1] neg_idx_per_image = negative[perm2] # create binary mask from indices pos_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) neg_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) pos_idx_per_image_mask[pos_idx_per_image] = 1 neg_idx_per_image_mask[neg_idx_per_image] = 1 pos_idx.append(pos_idx_per_image_mask) neg_idx.append(neg_idx_per_image_mask) return pos_idx, neg_idx @torch.jit._script_if_tracing def encode_boxes(reference_boxes, proposals, weights): # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes(gt) proposals (Tensor): boxes to be encoded(anchors) weights: """ # perform some unpacking to make it JIT-fusion friendly wx = weights[0] wy = weights[1] ww = weights[2] wh = weights[3] # unsqueeze() # Returns a new tensor with a dimension of size one inserted at the specified position. proposals_x1 = proposals[:, 0].unsqueeze(1) proposals_y1 = proposals[:, 1].unsqueeze(1) proposals_x2 = proposals[:, 2].unsqueeze(1) proposals_y2 = proposals[:, 3].unsqueeze(1) reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1) reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1) reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1) reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1) # implementation starts here # parse widths and heights ex_widths = proposals_x2 - proposals_x1 ex_heights = proposals_y2 - proposals_y1 # parse coordinate of center point ex_ctr_x = proposals_x1 + 0.5 * ex_widths ex_ctr_y = proposals_y1 + 0.5 * ex_heights gt_widths = reference_boxes_x2 - reference_boxes_x1 gt_heights = reference_boxes_y2 - reference_boxes_y1 gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * torch.log(gt_widths / ex_widths) targets_dh = wh * torch.log(gt_heights / ex_heights) targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets class BoxCoder(object): """ This class encodes and decodes a set of bounding boxes into the representation used for training the regressors. """ def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): # type: (Tuple[float, float, float, float], float) -> None """ Arguments: weights (4-element tuple) bbox_xform_clip (float) """ self.weights = weights self.bbox_xform_clip = bbox_xform_clip def encode(self, reference_boxes, proposals): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 结合anchors和与之对应的gt计算regression参数 Args: reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes proposals: List[Tensor] anchors/proposals Returns: regression parameters """ # 统计每张图像的anchors个数,方便后面拼接在一起处理后在分开 # reference_boxes和proposal数据结构相同 boxes_per_image = [len(b) for b in reference_boxes] reference_boxes = torch.cat(reference_boxes, dim=0) proposals = torch.cat(proposals, dim=0) # targets_dx, targets_dy, targets_dw, targets_dh targets = self.encode_single(reference_boxes, proposals) return targets.split(boxes_per_image, 0) def encode_single(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ dtype = reference_boxes.dtype device = reference_boxes.device weights = torch.as_tensor(self.weights, dtype=dtype, device=device) targets = encode_boxes(reference_boxes, proposals, weights) return targets def decode(self, rel_codes, boxes): # type: (Tensor, List[Tensor]) -> Tensor """ Args: rel_codes: bbox regression parameters boxes: anchors/proposals Returns: """ assert isinstance(boxes, (list, tuple)) assert isinstance(rel_codes, torch.Tensor) boxes_per_image = [b.size(0) for b in boxes] concat_boxes = torch.cat(boxes, dim=0) box_sum = 0 for val in boxes_per_image: box_sum += val # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标 pred_boxes = self.decode_single( rel_codes, concat_boxes ) if box_sum > 0: pred_boxes = pred_boxes.reshape(box_sum, -1, 4) return pred_boxes def decode_single(self, rel_codes, boxes): """ From a set of original boxes and encoded relative box offsets, get the decoded boxes. Arguments: rel_codes (Tensor): encoded boxes (bbox regression parameters) boxes (Tensor): reference boxes (anchors/proposals) """ boxes = boxes.to(rel_codes.dtype) # xmin, ymin, xmax, ymax widths = boxes[:, 2] - boxes[:, 0] # anchor/proposal宽度 heights = boxes[:, 3] - boxes[:, 1] # anchor/proposal高度 ctr_x = boxes[:, 0] + 0.5 * widths # anchor/proposal中心x坐标 ctr_y = boxes[:, 1] + 0.5 * heights # anchor/proposal中心y坐标 wx, wy, ww, wh = self.weights # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5] dx = rel_codes[:, 0::4] / wx # 预测anchors/proposals的中心坐标x回归参数 dy = rel_codes[:, 1::4] / wy # 预测anchors/proposals的中心坐标y回归参数 dw = rel_codes[:, 2::4] / ww # 预测anchors/proposals的宽度回归参数 dh = rel_codes[:, 3::4] / wh # 预测anchors/proposals的高度回归参数 # limit max value, prevent sending too large values into torch.exp() # self.bbox_xform_clip=math.log(1000. / 16) 4.135 dw = torch.clamp(dw, max=self.bbox_xform_clip) dh = torch.clamp(dh, max=self.bbox_xform_clip) pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] pred_w = torch.exp(dw) * widths[:, None] pred_h = torch.exp(dh) * heights[:, None] # xmin pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymin pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h # xmax pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymax pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1) return pred_boxes class Matcher(object): BELOW_LOW_THRESHOLD = -1 BETWEEN_THRESHOLDS = -2 __annotations__ = { 'BELOW_LOW_THRESHOLD': int, 'BETWEEN_THRESHOLDS': int, } def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): # type: (float, float, bool) -> None """ Args: high_threshold (float): quality values greater than or equal to this value are candidate matches. low_threshold (float): a lower quality threshold used to stratify matches into three levels: 1) matches >= high_threshold 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) allow_low_quality_matches (bool): if True, produce additional matches for predictions that have only low-quality match candidates. See set_low_quality_matches_ for more details. """ self.BELOW_LOW_THRESHOLD = -1 self.BETWEEN_THRESHOLDS = -2 assert low_threshold <= high_threshold self.high_threshold = high_threshold # 0.7 self.low_threshold = low_threshold # 0.3 self.allow_low_quality_matches = allow_low_quality_matches def __call__(self, match_quality_matrix): """ 计算anchors与每个gtboxes匹配的iou最大值,并记录索引, iou= self.low_threshold) & ( matched_vals < self.high_threshold ) # iou小于low_threshold的matches索引置为-1 matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD # -1 # iou在[low_threshold, high_threshold]之间的matches索引置为-2 matches[between_thresholds] = self.BETWEEN_THRESHOLDS # -2 if self.allow_low_quality_matches: assert all_matches is not None self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) return matches def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): """ Produce additional matches for predictions that have only low-quality matches. Specifically, for each ground-truth find the set of predictions that have maximum overlap with it (including ties); for each prediction in that set, if it is unmatched, then match it to the ground-truth with which it has the highest quality value. """ # For each gt, find the prediction with which it has highest quality # 对于每个gt boxes寻找与其iou最大的anchor, # highest_quality_foreach_gt为匹配到的最大iou值 highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) # the dimension to reduce. # Find highest quality match available, even if it is low, including ties # 寻找每个gt boxes与其iou最大的anchor索引,一个gt匹配到的最大iou可能有多个anchor # gt_pred_pairs_of_highest_quality = torch.nonzero( # match_quality_matrix == highest_quality_foreach_gt[:, None] # ) gt_pred_pairs_of_highest_quality = torch.where( torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None]) ) # Example gt_pred_pairs_of_highest_quality: # tensor([[ 0, 39796], # [ 1, 32055], # [ 1, 32070], # [ 2, 39190], # [ 2, 40255], # [ 3, 40390], # [ 3, 41455], # [ 4, 45470], # [ 5, 45325], # [ 5, 46390]]) # Each row is a (gt index, prediction index) # Note how gt items 1, 2, 3, and 5 each have two ties # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要) # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] pre_inds_to_update = gt_pred_pairs_of_highest_quality[1] # 保留该anchor匹配gt最大iou的索引,即使iou低于设定的阈值 matches[pre_inds_to_update] = all_matches[pre_inds_to_update] def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ n = torch.abs(input - target) # cond = n < beta cond = torch.lt(n, beta) loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum() ================================================ FILE: pytorch_object_detection/retinaNet/network_files/image_list.py ================================================ from typing import List, Tuple from torch import Tensor class ImageList(object): """ Structure that holds a list of images (of possibly varying sizes) as a single tensor. This works by padding the images to the same size, and storing in a field the original sizes of each image """ def __init__(self, tensors, image_sizes): # type: (Tensor, List[Tuple[int, int]]) -> None """ Arguments: tensors (tensor) padding后的图像数据 image_sizes (list[tuple[int, int]]) padding前的图像尺寸 """ self.tensors = tensors self.image_sizes = image_sizes def to(self, device): # type: (Device) -> ImageList # noqa cast_tensor = self.tensors.to(device) return ImageList(cast_tensor, self.image_sizes) ================================================ FILE: pytorch_object_detection/retinaNet/network_files/losses.py ================================================ import torch import torch.nn.functional as F def sigmoid_focal_loss( inputs: torch.Tensor, targets: torch.Tensor, alpha: float = 0.25, gamma: float = 2, reduction: str = "none", ): """ Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py . Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. Args: inputs: A float tensor of arbitrary shape. The predictions for each example. targets: A float tensor with the same shape as inputs. Stores the binary classification label for each element in inputs (0 for the negative class and 1 for the positive class). alpha: (optional) Weighting factor in range (0,1) to balance positive vs negative examples or -1 for ignore. Default = 0.25 gamma: Exponent of the modulating factor (1 - p_t) to balance easy vs hard examples. reduction: 'none' | 'mean' | 'sum' 'none': No reduction will be applied to the output. 'mean': The output will be averaged. 'sum': The output will be summed. Returns: Loss tensor with the reduction option applied. """ p = torch.sigmoid(inputs) ce_loss = F.binary_cross_entropy_with_logits( inputs, targets, reduction="none" ) p_t = p * targets + (1 - p) * (1 - targets) loss = ce_loss * ((1 - p_t) ** gamma) if alpha >= 0: alpha_t = alpha * targets + (1 - alpha) * (1 - targets) loss = alpha_t * loss if reduction == "mean": loss = loss.mean() elif reduction == "sum": loss = loss.sum() return loss ================================================ FILE: pytorch_object_detection/retinaNet/network_files/retinanet.py ================================================ import math import warnings from collections import OrderedDict from typing import Dict, List, Tuple, Optional, Union import torch from torch import nn, Tensor from . import det_utils from .anchor_utils import AnchorsGenerator from . import boxes as box_ops from .losses import sigmoid_focal_loss from .transform import GeneralizedRCNNTransform def _sum(x: List[Tensor]) -> Tensor: res = x[0] for i in x[1:]: res = res + i return res class RetinaNetClassificationHead(nn.Module): """ A classification head for use in RetinaNet. Args: in_channels (int): number of channels of the input feature num_anchors (int): number of anchors to be predicted num_classes (int): number of classes to be predicted """ def __init__(self, in_channels, num_anchors, num_classes, prior_probability=0.01): super(RetinaNetClassificationHead, self).__init__() # class subnet是由四个3x3的卷积层(激活函数为ReLU) + 一个3x3的卷积层(分类器) conv = [] for _ in range(4): conv.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) conv.append(nn.ReLU(inplace=True)) self.conv = nn.Sequential(*conv) self.cls_logits = nn.Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1) # initial weights for layer in self.conv.children(): if isinstance(layer, nn.Conv2d): torch.nn.init.normal_(layer.weight, std=0.01) torch.nn.init.constant_(layer.bias, 0) torch.nn.init.normal_(self.cls_logits.weight, std=0.01) torch.nn.init.constant_(self.cls_logits.bias, -math.log((1 - prior_probability) / prior_probability)) self.num_classes = num_classes self.num_anchors = num_anchors self.BETWEEN_THRESHOLDS = det_utils.Matcher.BETWEEN_THRESHOLDS def compute_loss(self, targets: List[Dict[str, Tensor]], head_outputs: Dict[str, Tensor], matched_idxs: List[Tensor]) -> Tensor: losses = [] cls_logits = head_outputs["cls_logits"] for targets_per_img, cls_logits_per_img, matched_idxs_per_img in zip(targets, cls_logits, matched_idxs): # determine only the foreground # 找出所有前景目标 foreground_idxs_per_img = torch.ge(matched_idxs_per_img, 0) # ge: >= num_foreground = foreground_idxs_per_img.sum() # create the target classification gt_classes_target = torch.zeros_like(cls_logits_per_img) gt_classes_target[ foreground_idxs_per_img, targets_per_img["labels"][matched_idxs_per_img[foreground_idxs_per_img]] ] = 1.0 # find indices for which anchors should be ignored # 忽略iou在[0.4, 0.5)之间的anchors valid_idxs_per_img = torch.ne(matched_idxs_per_img, self.BETWEEN_THRESHOLDS) # ne: != # compute the classification loss losses.append(sigmoid_focal_loss( cls_logits_per_img[valid_idxs_per_img], gt_classes_target[valid_idxs_per_img], reduction="sum" ) / max(1, num_foreground)) # 注意这里除以的是正样本的个数 # len(targets): batch_size return _sum(losses) / len(targets) def forward(self, x: Tensor) -> Tensor: all_cls_logits = [] # 遍历每个预测特征层 for features in x: cls_logits = self.conv(features) cls_logits = self.cls_logits(cls_logits) # Permute classification output from (N, A * K, H, W) to (N, HWA, K). N, _, H, W = cls_logits.shape cls_logits = cls_logits.view(N, -1, self.num_classes, H, W) # [N, A, K, H, W] -> [N, H, W, A, K] cls_logits = cls_logits.permute(0, 3, 4, 1, 2) # [N, H, W, A, K] -> [N, HWA, K] cls_logits = cls_logits.reshape(N, -1, self.num_classes) all_cls_logits.append(cls_logits) return torch.cat(all_cls_logits, dim=1) class RetinaNetRegressionHead(nn.Module): """ A regression head for use in RetinaNet. Args: in_channels (int): number of channels of the input feature num_anchors (int): number of anchors to be predicted """ __annotations__ = { 'box_coder': det_utils.BoxCoder, } def __init__(self, in_channels, num_anchors): super(RetinaNetRegressionHead, self).__init__() # box subnet是由四个3x3的卷积层(激活函数为ReLU) + 一个3x3的卷积层(边界框回归器) conv = [] for _ in range(4): conv.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) conv.append(nn.ReLU(inplace=True)) self.conv = nn.Sequential(*conv) self.bbox_reg = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1) # initial weights for layer in self.conv.children(): if isinstance(layer, nn.Conv2d): torch.nn.init.normal_(layer.weight, std=0.01) torch.nn.init.zeros_(layer.bias) self.bbox_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) def compute_loss(self, targets: List[Dict[str, Tensor]], head_outputs: Dict[str, Tensor], anchors: List[Tensor], matched_idxs: List[Tensor]) -> Tensor: losses = [] bbox_regression = head_outputs["bbox_regression"] for targets_per_img, bbox_regression_per_img, anchors_per_img, matched_idxs_per_img in \ zip(targets, bbox_regression, anchors, matched_idxs): # determine only the foreground indices, ignore the rest foreground_idxs_per_img = torch.where(torch.ge(matched_idxs_per_img, 0))[0] # ge: >= num_foreground = foreground_idxs_per_img.numel() # select only the foreground boxes matched_gt_boxes_per_img = targets_per_img["boxes"][matched_idxs_per_img[foreground_idxs_per_img]] bbox_regression_per_img = bbox_regression_per_img[foreground_idxs_per_img, :] anchors_per_img = anchors_per_img[foreground_idxs_per_img, :] # compute the regression targets targets_regression = self.bbox_coder.encode_single(matched_gt_boxes_per_img, anchors_per_img) # compute the box regression loss losses.append(torch.nn.functional.l1_loss( bbox_regression_per_img, targets_regression, reduction="sum" ) / max(1, num_foreground)) return _sum(losses) / max(1, len(targets)) def forward(self, x: List[Tensor]) -> Tensor: all_bbox_regression = [] # 遍历每个预测特征层 for features in x: bbox_regression = self.conv(features) bbox_regression = self.bbox_reg(bbox_regression) # Permute bbox regression output from (N, 4 * A, H, W) to (N, HWA, 4). N, _, H, W = bbox_regression.shape # [N, 4 * A, H, W] -> [N, A, 4, H, W] bbox_regression = bbox_regression.view(N, -1, 4, H, W) # [N, A, 4, H, W] -> [N, H, W, A, 4] bbox_regression = bbox_regression.permute(0, 3, 4, 1, 2) # [N, H, W, A, 4] -> [N, HWA, 4] bbox_regression = bbox_regression.reshape(N, -1, 4) all_bbox_regression.append(bbox_regression) return torch.cat(all_bbox_regression, dim=1) class RetinaNetHead(nn.Module): """ A regression and classification head for use in RetinaNet. Args: in_channels (int): number of channels of the input feature num_anchors (int): number of anchors to be predicted num_classes (int): number of classes to be predicted """ def __init__(self, in_channels, num_anchors, num_classes): super(RetinaNetHead, self).__init__() self.classification_head = RetinaNetClassificationHead(in_channels, num_anchors, num_classes) self.regression_head = RetinaNetRegressionHead(in_channels, num_anchors) def compute_loss(self, targets: List[Dict[str, Tensor]], head_outputs: Dict[str, Tensor], anchors: List[Tensor], matched_idxs: List[Tensor]) -> Dict[str, Tensor]: return { "classification": self.classification_head.compute_loss(targets, head_outputs, matched_idxs), "bbox_regression": self.regression_head.compute_loss(targets, head_outputs, anchors, matched_idxs) } def forward(self, x: List[Tensor]) -> Dict[str, Tensor]: return { "cls_logits": self.classification_head(x), "bbox_regression": self.regression_head(x) } class RetinaNet(nn.Module): """ Implements RetinaNet. The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each image, and should be in 0-1 range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. - labels (Int64Tensor[N]): the class label for each ground-truth box The model returns a Dict[Tensor] during training, containing the classification and regression losses. During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores for each prediction Args: backbone (nn.Module): the network used to compute the features for the model. It should contain an out_channels attribute, which indicates the number of output channels that each feature map has (and it should be the same for all feature maps). The backbone should return a single Tensor or an OrderedDict[Tensor]. num_classes (int): number of output classes of the model (excluding the background). min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone image_mean (Tuple[float, float, float]): mean values used for input normalization. They are generally the mean values of the dataset on which the backbone has been trained on image_std (Tuple[float, float, float]): std values used for input normalization. They are generally the std values of the dataset on which the backbone has been trained on anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. head (nn.Module): Module run on top of the feature pyramid. Defaults to a module containing a classification and regression module. score_thresh (float): Score threshold used for postprocessing the detections. nms_thresh (float): NMS threshold used for postprocessing the detections. detections_per_img (int): Number of best detections to keep after NMS. fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training. bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training. topk_candidates (int): Number of best detections to keep before NMS. """ __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, } def __init__(self, backbone, num_classes, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # Anchor parameters anchor_generator=None, head=None, proposal_matcher=None, score_thresh=0.05, nms_thresh=0.5, detections_per_img=100, fg_iou_thresh=0.5, bg_iou_thresh=0.4, topk_candidates=1000): super(RetinaNet, self).__init__() if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)" ) self.backbone = backbone assert isinstance(anchor_generator, (AnchorsGenerator, type(None))) if anchor_generator is None: # 原论文中说在每个预测特征层上除了使用给定的尺度x外,还要额外添加x*2^(1/3)和x*2^(2/3)这两个尺度 # 五个预测特征层采用的原始尺度分别为32, 64, 128, 256, 512 # 注意尺度和面积的关系,面积=尺度^2 anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3))) for x in [32, 64, 128, 256, 512]) # 对于每个预测特征层上anchors,都会使用三种比例 aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) anchor_generator = AnchorsGenerator(anchor_sizes, aspect_ratios) self.anchor_generator = anchor_generator if head is None: head = RetinaNetHead(backbone.out_channels, # in_channels anchor_generator.num_anchors_per_location()[0], # num_anchors num_classes) # num_classes self.head = head if proposal_matcher is None: proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=True ) self.proposal_matcher = proposal_matcher self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.topk_candidates = topk_candidates # used only on torchscript mode self._has_warned = False @torch.jit.unused def eager_outputs(self, losses, detections): # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]] if self.training: return losses return detections def compute_loss(self, targets, head_outputs, anchors): # type: (List[Dict[str, Tensor]], Dict[str, Tensor], List[Tensor]) -> Dict[str, Tensor] matched_idxs = [] for anchors_per_img, targets_per_img in zip(anchors, targets): if targets_per_img["boxes"].numel() == 0: matched_idxs.append(torch.full((anchors_per_img.size(0),), -1, dtype=torch.int64)) continue match_quality_matrix = box_ops.box_iou(targets_per_img["boxes"], anchors_per_img) matched_idxs.append(self.proposal_matcher(match_quality_matrix)) return self.head.compute_loss(targets, head_outputs, anchors, matched_idxs) def postprocess_detections(self, head_output, anchors, image_shapes): # type: (Dict[str, List[Tensor]], List[List[Tensor]], List[Tuple[int, int]]) -> List[Dict[str, Tensor]] class_logits = head_output["cls_logits"] box_regression = head_output["bbox_regression"] num_img = len(image_shapes) detections: List[Dict[str, Tensor]] = [] for index in range(num_img): box_regression_per_img = [br[index] for br in box_regression] logits_per_img = [cl[index] for cl in class_logits] anchors_per_img, image_shape = anchors[index], image_shapes[index] img_boxes = [] img_scores = [] img_labels = [] for box_regression_per_level, logits_per_level, anchors_per_level in \ zip(box_regression_per_img, logits_per_img, anchors_per_img): num_classes = logits_per_level.shape[-1] # remove low scoring boxes # 移除低概率的目标 scores_per_level = torch.sigmoid(logits_per_level).flatten() keep_idxs = torch.gt(scores_per_level, self.score_thresh) # gt: > scores_per_level = scores_per_level[keep_idxs] topk_idxs = torch.where(keep_idxs)[0] # keep only topk scoring predictions # 在每个level上只取前topk个目标 num_topk = min(self.topk_candidates, topk_idxs.size(0)) scores_per_level, idxs = scores_per_level.topk(num_topk) topk_idxs = topk_idxs[idxs] anchor_idxs = topk_idxs // num_classes labels_per_level = topk_idxs % num_classes boxes_per_level = self.box_coder.decode_single(box_regression_per_level[anchor_idxs], anchors_per_level[anchor_idxs]) boxes_per_level = box_ops.clip_boxes_to_image(boxes_per_level, image_shape) img_boxes.append(boxes_per_level) img_scores.append(scores_per_level) img_labels.append(labels_per_level) img_boxes = torch.cat(img_boxes, dim=0) img_scores = torch.cat(img_scores, dim=0) img_labels = torch.cat(img_labels, dim=0) # non-maximum suppression keep = box_ops.batched_nms(img_boxes, img_scores, img_labels, self.nms_thresh) keep = keep[:self.detections_per_img] detections.append({ "boxes": img_boxes[keep], "scores": img_scores[keep], "labels": img_labels[keep] }) return detections def forward(self, images, targets=None): # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]] """ Args: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if self.training: assert targets is not None # check targets info for target in targets: boxes = target["boxes"] if isinstance(boxes, torch.Tensor): if len(boxes.shape) != 2 or boxes.shape[-1] != 4: raise ValueError("Expected target boxes to be a tensor" "of shape [N, 4], got {:}.".format(boxes.shape)) else: raise ValueError("Expected target boxes to be of type " "Tensor, got {:}.".format(type(boxes))) # get the original images sizes original_img_sizes: List[Tuple[int, int]] = [] for img in images: val = img.shape[-2:] assert len(val) == 2 original_img_sizes.append((val[0], val[1])) # h, w # transform the input images, targets = self.transform(images, targets) # Check for degenerate boxes # TODO: Move this to a function if targets is not None: for target_idx, target in enumerate(targets): boxes = target["boxes"] degenerate_boxes = boxes[:, 2:] <= boxes[:, :2] if degenerate_boxes.any(): # print the first degenerate box bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0] degen_bb: List[float] = boxes[bb_idx].tolist() raise ValueError("All bounding boxes should have positive height and width." " Found invalid box {} for target at index {}." .format(degen_bb, target_idx)) # get the features from the backbone features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([("0", features)]) features = list(features.values()) # compute the retinanet heads outputs using the features head_outputs = self.head(features) # create the set of anchors anchors = self.anchor_generator(images, features) losses = {} detections: List[Dict[str, Tensor]] = [] if self.training: assert targets is not None losses = self.compute_loss(targets, head_outputs, anchors) else: # recover level sizes num_anchors_per_level = [x.size(2) * x.size(3) for x in features] HW = 0 for v in num_anchors_per_level: HW += v HWA = head_outputs["cls_logits"].size(1) A = HWA // HW num_anchors_per_level = [hw * A for hw in num_anchors_per_level] # split outputs per level split_head_outputs: Dict[str, List[Tensor]] = {} for k in head_outputs: split_head_outputs[k] = list(head_outputs[k].split(num_anchors_per_level, dim=1)) split_anchors = [list(a.split(num_anchors_per_level)) for a in anchors] # compute the detections detections = self.postprocess_detections(split_head_outputs, split_anchors, images.image_sizes) detections = self.transform.postprocess(detections, images.image_sizes, original_img_sizes) if torch.jit.is_scripting(): if not self._has_warned: warnings.warn("RetinaNet always returns a (Losses, Detections) tuple in scripting") self._has_warned = True return losses, detections return self.eager_outputs(losses, detections) ================================================ FILE: pytorch_object_detection/retinaNet/network_files/transform.py ================================================ import math from typing import List, Tuple, Dict, Optional import torch from torch import nn, Tensor import torchvision from .image_list import ImageList @torch.jit.unused def _resize_image_onnx(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor from torch.onnx import operators im_shape = operators.shape_as_tensor(image)[-2:] min_size = torch.min(im_shape).to(dtype=torch.float32) max_size = torch.max(im_shape).to(dtype=torch.float32) scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image def _resize_image(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) # 获取高宽中的最小值 max_size = float(torch.max(im_shape)) # 获取高宽中的最大值 scale_factor = self_min_size / min_size # 根据指定最小边长和图片最小边长计算缩放比例 # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长 if max_size * scale_factor > self_max_size: scale_factor = self_max_size / max_size # 将缩放比例设为指定最大边长和图片最大边长之比 # interpolate利用插值的方法缩放图片 # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W] # bilinear只支持4D Tensor image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image class GeneralizedRCNNTransform(nn.Module): """ Performs input / target transformation before feeding the data to a GeneralizedRCNN model. The transformations it perform are: - input normalization (mean subtraction and std division) - input / target resizing to match min_size / max_size It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets """ def __init__(self, min_size, max_size, image_mean, image_std): super(GeneralizedRCNNTransform, self).__init__() if not isinstance(min_size, (list, tuple)): min_size = (min_size,) self.min_size = min_size # 指定图像的最小边长范围 self.max_size = max_size # 指定图像的最大边长范围 self.image_mean = image_mean # 指定图像在标准化处理中的均值 self.image_std = image_std # 指定图像在标准化处理中的方差 def normalize(self, image): """标准化处理""" dtype, device = image.dtype, image.device mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) std = torch.as_tensor(self.image_std, dtype=dtype, device=device) # [:, None, None]: shape [3] -> [3, 1, 1] return (image - mean[:, None, None]) / std[:, None, None] def torch_choice(self, k): # type: (List[int]) -> int """ Implements `random.choice` via torch ops so it can be compiled with TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 is fixed. """ index = int(torch.empty(1).uniform_(0., float(len(k))).item()) return k[index] def resize(self, image, target): # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] """ 将图片缩放到指定的大小范围内,并对应缩放bboxes信息 Args: image: 输入的图片 target: 输入图片的相关信息(包括bboxes信息) Returns: image: 缩放后的图片 target: 缩放bboxes后的图片相关信息 """ # image shape is [channel, height, width] h, w = image.shape[-2:] if self.training: size = float(self.torch_choice(self.min_size)) # 指定输入图片的最小边长,注意是self.min_size不是min_size else: # FIXME assume for now that testing uses the largest scale size = float(self.min_size[-1]) # 指定输入图片的最小边长,注意是self.min_size不是min_size if torchvision._is_tracing(): image = _resize_image_onnx(image, size, float(self.max_size)) else: image = _resize_image(image, size, float(self.max_size)) if target is None: return image, target bbox = target["boxes"] # 根据图像的缩放比例来缩放bbox bbox = resize_boxes(bbox, [h, w], image.shape[-2:]) target["boxes"] = bbox return image, target # _onnx_batch_images() is an implementation of # batch_images() that is supported by ONNX tracing. @torch.jit.unused def _onnx_batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor max_size = [] for i in range(images[0].dim()): max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) max_size.append(max_size_i) stride = size_divisible max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) max_size = tuple(max_size) # work around for # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) # which is not yet supported in onnx padded_imgs = [] for img in images: padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]]) padded_imgs.append(padded_img) return torch.stack(padded_imgs) def max_by_axis(self, the_list): # type: (List[List[int]]) -> List[int] maxes = the_list[0] for sublist in the_list[1:]: for index, item in enumerate(sublist): maxes[index] = max(maxes[index], item) return maxes def batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor """ 将一批图像打包成一个batch返回(注意batch中每个tensor的shape是相同的) Args: images: 输入的一批图片 size_divisible: 将图像高和宽调整到该数的整数倍 Returns: batched_imgs: 打包成一个batch后的tensor数据 """ if torchvision._is_tracing(): # batch_images() does not export well to ONNX # call _onnx_batch_images() instead return self._onnx_batch_images(images, size_divisible) # 分别计算一个batch中所有图片中的最大channel, height, width max_size = self.max_by_axis([list(img.shape) for img in images]) stride = float(size_divisible) # max_size = list(max_size) # 将height向上调整到stride的整数倍 max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) # 将width向上调整到stride的整数倍 max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) # [batch, channel, height, width] batch_shape = [len(images)] + max_size # 创建shape为batch_shape且值全部为0的tensor batched_imgs = images[0].new_full(batch_shape, 0) for img, pad_img in zip(images, batched_imgs): # 将输入images中的每张图片复制到新的batched_imgs的每张图片中,对齐左上角,保证bboxes的坐标不变 # 这样保证输入到网络中一个batch的每张图片的shape相同 # copy_: Copies the elements from src into self tensor and returns self pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) return batched_imgs def postprocess(self, result, # type: List[Dict[str, Tensor]] image_shapes, # type: List[Tuple[int, int]] original_image_sizes # type: List[Tuple[int, int]] ): # type: (...) -> List[Dict[str, Tensor]] """ 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) Args: result: list(dict), 网络的预测结果, len(result) == batch_size image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size Returns: """ if self.training: return result # 遍历每张图片的预测信息,将boxes信息还原回原尺度 for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): boxes = pred["boxes"] boxes = resize_boxes(boxes, im_s, o_im_s) # 将bboxes缩放回原图像尺度上 result[i]["boxes"] = boxes return result def __repr__(self): """自定义输出实例化对象的信息,可通过print打印实例信息""" format_string = self.__class__.__name__ + '(' _indent = '\n ' format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std) format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size, self.max_size) format_string += '\n)' return format_string def forward(self, images, # type: List[Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]] images = [img for img in images] for i in range(len(images)): image = images[i] target_index = targets[i] if targets is not None else None if image.dim() != 3: raise ValueError("images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) image = self.normalize(image) # 对图像进行标准化处理 image, target_index = self.resize(image, target_index) # 对图像和对应的bboxes缩放到指定范围 images[i] = image if targets is not None and target_index is not None: targets[i] = target_index # 记录resize后的图像尺寸 image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) # 将images打包成一个batch image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) for image_size in image_sizes: assert len(image_size) == 2 image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes_list) return image_list, targets def resize_boxes(boxes, original_size, new_size): # type: (Tensor, List[int], List[int]) -> Tensor """ 将boxes参数根据图像的缩放情况进行相应缩放 Arguments: original_size: 图像缩放前的尺寸 new_size: 图像缩放后的尺寸 """ ratios = [ torch.tensor(s, dtype=torch.float32, device=boxes.device) / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) for s, s_orig in zip(new_size, original_size) ] ratios_height, ratios_width = ratios # Removes a tensor dimension, boxes [minibatch, 4] # Returns a tuple of all slices along a given dimension, already without it. xmin, ymin, xmax, ymax = boxes.unbind(1) xmin = xmin * ratios_width xmax = xmax * ratios_width ymin = ymin * ratios_height ymax = ymax * ratios_height return torch.stack((xmin, ymin, xmax, ymax), dim=1) ================================================ FILE: pytorch_object_detection/retinaNet/pascal_voc_classes.json ================================================ { "aeroplane": 0, "bicycle": 1, "bird": 2, "boat": 3, "bottle": 4, "bus": 5, "car": 6, "cat": 7, "chair": 8, "cow": 9, "diningtable": 10, "dog": 11, "horse": 12, "motorbike": 13, "person": 14, "pottedplant": 15, "sheep": 16, "sofa": 17, "train": 18, "tvmonitor": 19 } ================================================ FILE: pytorch_object_detection/retinaNet/plot_curve.py ================================================ import datetime import matplotlib.pyplot as plt def plot_loss_and_lr(train_loss, learning_rate): try: x = list(range(len(train_loss))) fig, ax1 = plt.subplots(1, 1) ax1.plot(x, train_loss, 'r', label='loss') ax1.set_xlabel("step") ax1.set_ylabel("loss") ax1.set_title("Train Loss and lr") plt.legend(loc='best') ax2 = ax1.twinx() ax2.plot(x, learning_rate, label='lr') ax2.set_ylabel("learning rate") ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 plt.legend(loc='best') handles1, labels1 = ax1.get_legend_handles_labels() handles2, labels2 = ax2.get_legend_handles_labels() plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) plt.close() print("successful save loss curve! ") except Exception as e: print(e) def plot_map(mAP): try: x = list(range(len(mAP))) plt.plot(x, mAP, label='mAp') plt.xlabel('epoch') plt.ylabel('mAP') plt.title('Eval mAP') plt.xlim(0, len(mAP)) plt.legend(loc='best') plt.savefig('./mAP.png') plt.close() print("successful save mAP curve!") except Exception as e: print(e) ================================================ FILE: pytorch_object_detection/retinaNet/predict.py ================================================ import os import time import json import torch from PIL import Image import matplotlib.pyplot as plt from torchvision import transforms from network_files import RetinaNet from backbone import resnet50_fpn_backbone, LastLevelP6P7 from draw_box_utils import draw_objs def create_model(num_classes): # resNet50+fpn+retinanet # 注意,这里的norm_layer要和训练脚本中保持一致 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)) model = RetinaNet(backbone, num_classes) return model def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model # 注意:不包含背景 model = create_model(num_classes=20) # load train weights weights_path = "./save_weights/model.pth" assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: class_dict = json.load(f) category_index = {str(v): str(k) for k, v in class_dict.items()} # load image original_img = Image.open("./test.jpg") # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() predictions = model(img.to(device))[0] t_end = time_synchronized() print("inference+NMS time: {}".format(t_end - t_start)) predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") plot_img = draw_objs(original_img, predict_boxes, predict_classes, predict_scores, category_index=category_index, box_thresh=0.5, line_thickness=3, font='arial.ttf', font_size=20) plt.imshow(plot_img) plt.show() # 保存预测的图片结果 plot_img.save("test_result.jpg") if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/retinaNet/requirements.txt ================================================ lxml matplotlib numpy tqdm torch==1.7.1 torchvision==0.8.2 pycocotools Pillow ================================================ FILE: pytorch_object_detection/retinaNet/results20210421-142632.txt ================================================ epoch:0 0.4012 0.6088 0.4334 0.1691 0.3113 0.4498 0.4265 0.6233 0.6478 0.3362 0.5541 0.6977 1.0681 0.01 epoch:1 0.5028 0.7295 0.5441 0.2219 0.3913 0.5552 0.4624 0.6649 0.6875 0.4039 0.5928 0.7346 0.5422 0.01 epoch:2 0.5311 0.7614 0.5784 0.2439 0.4189 0.5852 0.4733 0.6774 0.698 0.417 0.6105 0.7441 0.4456 0.01 epoch:3 0.5439 0.7762 0.595 0.2412 0.4292 0.5996 0.4773 0.6835 0.7021 0.4137 0.6074 0.7494 0.3872 0.01 epoch:4 0.5404 0.7739 0.5949 0.2457 0.426 0.5968 0.4723 0.6818 0.7007 0.4363 0.6047 0.7479 0.347 0.01 epoch:5 0.5513 0.7867 0.6021 0.2415 0.4265 0.6087 0.4811 0.685 0.7041 0.4073 0.6088 0.7526 0.3166 0.01 epoch:6 0.5508 0.7909 0.6014 0.2327 0.4211 0.6116 0.478 0.6803 0.699 0.4081 0.5994 0.7485 0.2884 0.01 epoch:7 0.5617 0.7972 0.6142 0.2431 0.427 0.6223 0.4848 0.6862 0.7049 0.4184 0.6018 0.7551 0.2546 0.001 epoch:8 0.561 0.7986 0.6117 0.2342 0.4268 0.6223 0.4842 0.6855 0.705 0.4153 0.6051 0.7551 0.2462 0.001 epoch:9 0.563 0.7983 0.6153 0.2359 0.4336 0.6237 0.4849 0.6884 0.7068 0.4103 0.6063 0.7574 0.2428 0.001 epoch:10 0.563 0.7991 0.6167 0.2363 0.4334 0.6234 0.4854 0.6879 0.7062 0.4152 0.6063 0.7558 0.2391 0.001 epoch:11 0.5637 0.7984 0.6145 0.2341 0.4345 0.6241 0.4842 0.6894 0.7083 0.4136 0.6074 0.7581 0.2355 0.001 epoch:12 0.5624 0.7969 0.6155 0.2373 0.4292 0.623 0.4853 0.6866 0.7055 0.4136 0.6026 0.756 0.2323 0.0001 epoch:13 0.5632 0.7985 0.6155 0.2358 0.4342 0.6243 0.4858 0.6878 0.7065 0.4206 0.6039 0.7576 0.2307 0.0001 epoch:14 0.562 0.7977 0.6155 0.2309 0.4291 0.6234 0.4849 0.6869 0.7051 0.4198 0.6023 0.7558 0.2305 0.0001 epoch:15 0.5631 0.7984 0.6155 0.2324 0.4326 0.6238 0.4849 0.6876 0.706 0.4151 0.6039 0.7565 0.2313 0.0001 epoch:16 0.5632 0.7992 0.6164 0.2349 0.429 0.6245 0.4859 0.6871 0.7063 0.4186 0.604 0.7569 0.2302 0.0001 epoch:17 0.5637 0.7994 0.6164 0.2325 0.4312 0.6245 0.4854 0.6873 0.706 0.4109 0.6023 0.7567 0.2312 0.0001 epoch:18 0.5626 0.7984 0.6132 0.2333 0.431 0.6238 0.4854 0.6873 0.7056 0.4158 0.6025 0.7564 0.2298 0.0001 epoch:19 0.5613 0.7981 0.612 0.2365 0.4278 0.622 0.4855 0.6867 0.7047 0.4112 0.6 0.7554 0.2305 0.0001 ================================================ FILE: pytorch_object_detection/retinaNet/train.py ================================================ import os import datetime import torch import transforms from backbone import resnet50_fpn_backbone, LastLevelP6P7 from network_files import RetinaNet from my_dataset import VOCDataSet from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from train_utils import train_eval_utils as utils def create_model(num_classes): # 创建retinanet_res50_fpn模型 # skip P2 because it generates too many anchors (according to their paper) # 注意,这里的backbone默认使用的是FrozenBatchNorm2d,即不会去更新bn参数 # 目的是为了防止batch_size太小导致效果更差(如果显存很小,建议使用默认的FrozenBatchNorm2d) # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), trainable_layers=3) model = RetinaNet(backbone, num_classes) # 载入预训练权重 # https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth weights_dict = torch.load("./backbone/retinanet_resnet50_fpn.pth", map_location='cpu') # 删除分类器部分的权重,因为自己的数据集类别与预训练数据集类别(91)不一定致,如果载入会出现冲突 del_keys = ["head.classification_head.cls_logits.weight", "head.classification_head.cls_logits.bias"] for k in del_keys: del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model # 注意:不包含背景 model = create_model(num_classes=args.num_classes) # print(model) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.33) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if args.resume != "": checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) print("the training process from epoch{}...".format(args.start_epoch)) train_loss = [] learning_rate = [] val_map = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 10 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal map # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/resNetFpn-model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=15, type=int, metavar='N', help='number of total epochs to run') # 训练的batch size parser.add_argument('--batch_size', default=4, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/retinaNet/train_multi_GPU.py ================================================ import os import time import datetime import torch import transforms from backbone import resnet50_fpn_backbone, LastLevelP6P7 from network_files import RetinaNet from my_dataset import VOCDataSet from train_utils import train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir def create_model(num_classes): # 创建retinanet_res50_fpn模型 # skip P2 because it generates too many anchors (according to their paper) # 注意,这里的backbone默认使用的是FrozenBatchNorm2d,即不会去更新bn参数 # 目的是为了防止batch_size太小导致效果更差(如果显存很小,建议使用默认的FrozenBatchNorm2d) # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), trainable_layers=3) model = RetinaNet(backbone, num_classes) # 载入预训练权重 # https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth weights_dict = torch.load("./backbone/retinanet_resnet50_fpn.pth", map_location='cpu') # 删除分类器部分的权重,因为自己的数据集类别与预训练数据集类别(91)不一定致,如果载入会出现冲突 del_keys = ["head.classification_head.cls_logits.weight", "head.classification_head.cls_logits.bias"] for k in del_keys: del weights_dict[k] print(model.load_state_dict(weights_dict, strict=False)) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # Data loading code print("Loading data") data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform["train"], "train.txt") # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) if args.aspect_ratio_group_factor >= 0: # 统计所有图像比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) data_loader_test = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model # 注意:不包含背景 model = create_model(num_classes=args.num_classes) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: utils.evaluate(model, data_loader_test, device=device) return train_loss = [] learning_rate = [] val_map = [] print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update learning rate lr_scheduler.step() # evaluate after every epoch coco_info = utils.evaluate(model, data_loader_test, device=device) val_map.append(coco_info[1]) # pascal mAP # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") if args.output_dir: # 只在主节点上执行保存权重操作 save_files = { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() save_on_master(save_files, os.path.join(args.output_dir, f'model_{epoch}.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if args.rank in [-1, 0]: # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=20, type=int, metavar='N', help='number of total epochs to run') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 学习率,这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU parser.add_argument('--lr', default=0.02, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.StepLR的参数 parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 开启的进程数(注意不是线程) parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/retinaNet/train_utils/__init__.py ================================================ from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups from .distributed_utils import init_distributed_mode, save_on_master, mkdir from .coco_utils import get_coco_api_from_dataset from .coco_eval import CocoEvaluator ================================================ FILE: pytorch_object_detection/retinaNet/train_utils/coco_eval.py ================================================ import json from collections import defaultdict import numpy as np import copy import torch import torch._six from pycocotools.cocoeval import COCOeval from pycocotools.coco import COCO import pycocotools.mask as mask_util from .distributed_utils import all_gather class CocoEvaluator(object): def __init__(self, coco_gt, iou_types): assert isinstance(iou_types, (list, tuple)) coco_gt = copy.deepcopy(coco_gt) self.coco_gt = coco_gt self.iou_types = iou_types self.coco_eval = {} for iou_type in iou_types: self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) self.img_ids = [] self.eval_imgs = {k: [] for k in iou_types} def update(self, predictions): img_ids = list(np.unique(list(predictions.keys()))) self.img_ids.extend(img_ids) for iou_type in self.iou_types: results = self.prepare(predictions, iou_type) coco_dt = loadRes(self.coco_gt, results) if results else COCO() coco_eval = self.coco_eval[iou_type] coco_eval.cocoDt = coco_dt coco_eval.params.imgIds = list(img_ids) img_ids, eval_imgs = evaluate(coco_eval) self.eval_imgs[iou_type].append(eval_imgs) def synchronize_between_processes(self): for iou_type in self.iou_types: self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) def accumulate(self): for coco_eval in self.coco_eval.values(): coco_eval.accumulate() def summarize(self): for iou_type, coco_eval in self.coco_eval.items(): print("IoU metric: {}".format(iou_type)) coco_eval.summarize() def prepare(self, predictions, iou_type): if iou_type == "bbox": return self.prepare_for_coco_detection(predictions) elif iou_type == "segm": return self.prepare_for_coco_segmentation(predictions) elif iou_type == "keypoints": return self.prepare_for_coco_keypoint(predictions) else: raise ValueError("Unknown iou type {}".format(iou_type)) def prepare_for_coco_detection(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes) ] ) return coco_results def prepare_for_coco_segmentation(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue scores = prediction["scores"] labels = prediction["labels"] masks = prediction["masks"] masks = masks > 0.5 scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() rles = [ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "segmentation": rle, "score": scores[k], } for k, rle in enumerate(rles) ] ) return coco_results def prepare_for_coco_keypoint(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() keypoints = prediction["keypoints"] keypoints = keypoints.flatten(start_dim=1).tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], 'keypoints': keypoint, "score": scores[k], } for k, keypoint in enumerate(keypoints) ] ) return coco_results def convert_to_xywh(boxes): xmin, ymin, xmax, ymax = boxes.unbind(1) return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) def merge(img_ids, eval_imgs): all_img_ids = all_gather(img_ids) all_eval_imgs = all_gather(eval_imgs) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_imgs = [] for p in all_eval_imgs: merged_eval_imgs.append(p) merged_img_ids = np.array(merged_img_ids) merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) # keep only unique (and in sorted order) images merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_imgs = merged_eval_imgs[..., idx] return merged_img_ids, merged_eval_imgs def create_common_coco_eval(coco_eval, img_ids, eval_imgs): img_ids, eval_imgs = merge(img_ids, eval_imgs) img_ids = list(img_ids) eval_imgs = list(eval_imgs.flatten()) coco_eval.evalImgs = eval_imgs coco_eval.params.imgIds = img_ids coco_eval._paramsEval = copy.deepcopy(coco_eval.params) ################################################################# # From pycocotools, just removed the prints and fixed # a Python3 bug about unicode not defined ################################################################# # Ideally, pycocotools wouldn't have hard-coded prints # so that we could avoid copy-pasting those two functions def createIndex(self): # create index # print('creating index...') anns, cats, imgs = {}, {}, {} imgToAnns, catToImgs = defaultdict(list), defaultdict(list) if 'annotations' in self.dataset: for ann in self.dataset['annotations']: imgToAnns[ann['image_id']].append(ann) anns[ann['id']] = ann if 'images' in self.dataset: for img in self.dataset['images']: imgs[img['id']] = img if 'categories' in self.dataset: for cat in self.dataset['categories']: cats[cat['id']] = cat if 'annotations' in self.dataset and 'categories' in self.dataset: for ann in self.dataset['annotations']: catToImgs[ann['category_id']].append(ann['image_id']) # print('index created!') # create class members self.anns = anns self.imgToAnns = imgToAnns self.catToImgs = catToImgs self.imgs = imgs self.cats = cats maskUtils = mask_util def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] # print('Loading and preparing results...') # tic = time.time() if isinstance(resFile, torch._six.string_classes): anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if 'bbox' not in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x2 - x1) * (y2 - y1) ann['id'] = id + 1 ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns createIndex(res) return res def evaluate(self): ''' Run per image evaluation on given images and store results (a list of dict) in self.evalImgs :return: None ''' # tic = time.time() # print('Running per image evaluation...') p = self.params # add backward compatibility if useSegm is specified in params if p.useSegm is not None: p.iouType = 'segm' if p.useSegm == 1 else 'bbox' print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) # print('Evaluate annotation type *{}*'.format(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) p.maxDets = sorted(p.maxDets) self.params = p self._prepare() # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] if p.iouType == 'segm' or p.iouType == 'bbox': computeIoU = self.computeIoU elif p.iouType == 'keypoints': computeIoU = self.computeOks self.ious = { (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds} evaluateImg = self.evaluateImg maxDet = p.maxDets[-1] evalImgs = [ evaluateImg(imgId, catId, areaRng, maxDet) for catId in catIds for areaRng in p.areaRng for imgId in p.imgIds ] # this is NOT in the pycocotools code, but could be done outside evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) self._paramsEval = copy.deepcopy(self.params) # toc = time.time() # print('DONE (t={:0.2f}s).'.format(toc-tic)) return p.imgIds, evalImgs ################################################################# # end of straight copy from pycocotools, just removing the prints ################################################################# ================================================ FILE: pytorch_object_detection/retinaNet/train_utils/coco_utils.py ================================================ import torch import torchvision import torch.utils.data from pycocotools.coco import COCO def convert_to_coco_api(ds): coco_ds = COCO() # annotation IDs need to start at 1, not 0 ann_id = 1 dataset = {'images': [], 'categories': [], 'annotations': []} categories = set() for img_idx in range(len(ds)): # find better way to get target hw, targets = ds.coco_index(img_idx) image_id = targets["image_id"].item() img_dict = {} img_dict['id'] = image_id img_dict['height'] = hw[0] img_dict['width'] = hw[1] dataset['images'].append(img_dict) bboxes = targets["boxes"] bboxes[:, 2:] -= bboxes[:, :2] bboxes = bboxes.tolist() labels = targets['labels'].tolist() areas = targets['area'].tolist() iscrowd = targets['iscrowd'].tolist() num_objs = len(bboxes) for i in range(num_objs): ann = {} ann['image_id'] = image_id ann['bbox'] = bboxes[i] ann['category_id'] = labels[i] categories.add(labels[i]) ann['area'] = areas[i] ann['iscrowd'] = iscrowd[i] ann['id'] = ann_id dataset['annotations'].append(ann) ann_id += 1 dataset['categories'] = [{'id': i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds def get_coco_api_from_dataset(dataset): for _ in range(10): if isinstance(dataset, torchvision.datasets.CocoDetection): break if isinstance(dataset, torch.utils.data.Subset): dataset = dataset.dataset if isinstance(dataset, torchvision.datasets.CocoDetection): return dataset.coco return convert_to_coco_api(dataset) ================================================ FILE: pytorch_object_detection/retinaNet/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() if world_size == 1: return [data] # serialized to a Tensor buffer = pickle.dumps(data) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to("cuda") # obtain Tensor size of each rank local_size = torch.tensor([tensor.numel()], device="cuda") size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] dist.all_gather(size_list, local_size) size_list = [int(size.item()) for size in size_list] max_size = max(size_list) # receiving Tensor from all ranks # we pad the tensor because torch all_gather does not support # gathering tensors of different shapes tensor_list = [] for _ in size_list: tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) if local_size != max_size: padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") tensor = torch.cat((tensor, padding), dim=0) dist.all_gather(tensor_list, tensor) data_list = [] for size, tensor in zip(size_list, tensor_list): buffer = tensor.cpu().numpy().tobytes()[:size] data_list.append(pickle.loads(buffer)) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_object_detection/retinaNet/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k+1等份 bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_object_detection/retinaNet/train_utils/train_eval_utils.py ================================================ import math import sys import time import torch from .coco_utils import get_coco_api_from_dataset from .coco_eval import CocoEvaluator import train_utils.distributed_utils as utils def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mloss = torch.zeros(1).to(device) # mean losses for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with torch.cuda.amp.autocast(enabled=scaler is not None): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() if scaler is not None: scaler.scale(losses).backward() scaler.step(optimizer) scaler.update() else: losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist() # numpy to list return coco_info def _get_iou_types(model): model_without_ddp = model if isinstance(model, torch.nn.parallel.DistributedDataParallel): model_without_ddp = model.module iou_types = ["bbox"] return iou_types ================================================ FILE: pytorch_object_detection/retinaNet/transforms.py ================================================ import random from torchvision.transforms import functional as F class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class ToTensor(object): """将PIL图像转为Tensor""" def __call__(self, image, target): image = F.to_tensor(image) return image, target class RandomHorizontalFlip(object): """随机水平翻转图像以及bboxes""" def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: height, width = image.shape[-2:] image = image.flip(-1) # 水平翻转图片 bbox = target["boxes"] # bbox: xmin, ymin, xmax, ymax bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 target["boxes"] = bbox return image, target ================================================ FILE: pytorch_object_detection/retinaNet/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 以及每个类别的mAP(IoU=0.5) """ import os import json import torch from tqdm import tqdm import numpy as np import transforms from network_files import RetinaNet from backbone import resnet50_fpn_backbone, LastLevelP6P7 from my_dataset import VOCDataSet from train_utils import get_coco_api_from_dataset, CocoEvaluator def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 12, [""] * 12 stats[0], print_list[0] = _summarize(1) stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0]) stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1]) stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2]) stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = { "val": transforms.Compose([transforms.ToTensor()]) } # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: class_dict = json.load(f) category_index = {v: k for k, v in class_dict.items()} VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = VOCDataSet(VOC_root, "2012", data_transform["val"], "val.txt") val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # create model # 注意,这里的norm_layer要和训练脚本中保持一致 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)) model = RetinaNet(backbone, parser_data.num_classes) # 载入你自己训练好的模型权重 weights_path = parser_data.weights_path assert os.path.exists(weights_path), "not found {} file.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) # print(model) model.to(device) # evaluate on the test dataset coco = get_coco_api_from_dataset(val_dataset) iou_types = ["bbox"] coco_evaluator = CocoEvaluator(coco, iou_types) cpu_device = torch.device("cpu") model.eval() with torch.no_grad(): for image, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_eval = coco_evaluator.coco_eval["bbox"] # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_eval) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_eval, catId=i) voc_map_info_list.append(" {:15}: {}".format(category_index[i], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = ["COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc] f.write("\n".join(record_lines)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 检测目标类别数 parser.add_argument('--num-classes', type=int, default='20', help='number of classes') # 数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data', help='dataset root') # 训练好的权重文件 parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights') # batch size parser.add_argument('--batch_size', default=1, type=int, metavar='N', help='batch size when validation.') args = parser.parse_args() main(args) ================================================ FILE: pytorch_object_detection/ssd/README.md ================================================ # SSD: Single Shot MultiBox Detector ## 环境配置: * Python 3.6/3.7/3.8 * Pytorch 1.7.1 * pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs)) * Ubuntu或Centos(不建议Windows) * 最好使用GPU训练 ## 文件结构: ``` ├── src: 实现SSD模型的相关模块 │ ├── resnet50_backbone.py 使用resnet50网络作为SSD的backbone │ ├── ssd_model.py SSD网络结构文件 │ └── utils.py 训练过程中使用到的一些功能实现 ├── train_utils: 训练验证相关模块(包括cocotools) ├── my_dataset.py: 自定义dataset用于读取VOC数据集 ├── train_ssd300.py: 以resnet50做为backbone的SSD网络进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict_test.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── pascal_voc_classes.json: pascal_voc标签文件 ├── plot_curve.py: 用于绘制训练过程的损失以及验证集的mAP └── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 ``` ## 预训练权重下载地址(下载后放入src文件夹中): * ResNet50+SSD: https://ngc.nvidia.com/catalog/models `搜索ssd -> 找到SSD for PyTorch(FP32) -> download FP32 -> 解压文件` * 如果找不到可通过百度网盘下载,链接:https://pan.baidu.com/s/1byOnoNuqmBLZMDA0-lbCMQ 提取码:iggj ## 数据集,本例程使用的是PASCAL VOC2012数据集(下载后放入项目当前文件夹中) * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * Pascal VOC2007 test数据集请参考:http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的bilibili:https://b23.tv/F1kSCK ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 单GPU训练或CPU,直接使用train_ssd300.py训练脚本 * 若要使用多GPU训练,使用 "python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py" 指令,nproc_per_node参数为使用GPU数量 * 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 ## 如果对SSD算法原理不是很理解可参考我的bilibili * https://www.bilibili.com/video/BV1fT4y1L7Gi ## 进一步了解该项目,以及对SSD算法代码的分析可参考我的bilibili * https://www.bilibili.com/video/BV1vK411H771/ ## Resnet50 + SSD算法框架图 ![Resnet50 SSD](res50_ssd.png) ================================================ FILE: pytorch_object_detection/ssd/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = False): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: pytorch_object_detection/ssd/my_dataset.py ================================================ from torch.utils.data import Dataset import os import torch import json from PIL import Image from lxml import etree class VOCDataSet(Dataset): """读取解析PASCAL VOC2007/2012数据集""" def __init__(self, voc_root, year="2012", transforms=None, train_set='train.txt'): assert year in ["2007", "2012"], "year must be in ['2007', '2012']" # 增加容错能力 if "VOCdevkit" in voc_root: self.root = os.path.join(voc_root, f"VOC{year}") else: self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") self.img_root = os.path.join(self.root, "JPEGImages") self.annotations_root = os.path.join(self.root, "Annotations") txt_list = os.path.join(self.root, "ImageSets", "Main", train_set) with open(txt_list) as read: self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml") for line in read.readlines() if len(line.strip()) > 0] # read class_indict json_file = "./pascal_voc_classes.json" assert os.path.exists(json_file), "{} file not exist.".format(json_file) with open(json_file, 'r') as f: self.class_dict = json.load(f) self.transforms = transforms def __len__(self): return len(self.xml_list) def __getitem__(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) height_width = [data_height, data_width] img_path = os.path.join(self.img_root, data["filename"]) image = Image.open(img_path) if image.format != "JPEG": raise ValueError("Image '{}' format not JPEG".format(img_path)) assert "object" in data, "{} lack of object information.".format(xml_path) boxes = [] labels = [] iscrowd = [] for obj in data["object"]: # 将所有的gt box信息转换成相对值0-1之间 xmin = float(obj["bndbox"]["xmin"]) / data_width xmax = float(obj["bndbox"]["xmax"]) / data_width ymin = float(obj["bndbox"]["ymin"]) / data_height ymax = float(obj["bndbox"]["ymax"]) / data_height # 进一步检查数据,有的标注信息中可能有w或h为0的情况,这样的数据会导致计算回归loss为nan if xmax <= xmin or ymax <= ymin: print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path)) continue boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) if "difficult" in obj: iscrowd.append(int(obj["difficult"])) else: iscrowd.append(0) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) height_width = torch.as_tensor(height_width, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd target["height_width"] = height_width if self.transforms is not None: image, target = self.transforms(image, target) return image, target def get_height_and_width(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) return data_height, data_width def parse_xml_to_dict(self, xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = self.parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def coco_index(self, idx): """ 该方法是专门为pycocotools统计标签信息准备,不对图像和标签作任何处理 由于不用去读取图片,可大幅缩减统计时间 Args: idx: 输入需要获取图像的索引 """ # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) height_width = [data_height, data_width] # img_path = os.path.join(self.img_root, data["filename"]) # image = Image.open(img_path) # if image.format != "JPEG": # raise ValueError("Image format not JPEG") boxes = [] labels = [] iscrowd = [] for obj in data["object"]: # 将所有的gt box信息转换成相对值0-1之间 xmin = float(obj["bndbox"]["xmin"]) / data_width xmax = float(obj["bndbox"]["xmax"]) / data_width ymin = float(obj["bndbox"]["ymin"]) / data_height ymax = float(obj["bndbox"]["ymax"]) / data_height boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) iscrowd.append(int(obj["difficult"])) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) height_width = torch.as_tensor(height_width, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd target["height_width"] = height_width return target @staticmethod def collate_fn(batch): images, targets = tuple(zip(*batch)) # images = torch.stack(images, dim=0) # # boxes = [] # labels = [] # img_id = [] # for t in targets: # boxes.append(t['boxes']) # labels.append(t['labels']) # img_id.append(t["image_id"]) # targets = {"boxes": torch.stack(boxes, dim=0), # "labels": torch.stack(labels, dim=0), # "image_id": torch.as_tensor(img_id)} return images, targets # import transforms # from draw_box_utils import draw_objs # from PIL import Image # import json # import matplotlib.pyplot as plt # import torchvision.transforms as ts # import random # # # read class_indict # category_index = {} # try: # json_file = open('./pascal_voc_classes.json', 'r') # class_dict = json.load(json_file) # category_index = {str(v): str(k) for k, v in class_dict.items()} # except Exception as e: # print(e) # exit(-1) # # data_transform = { # "train": transforms.Compose([transforms.ToTensor(), # transforms.RandomHorizontalFlip(0.5)]), # "val": transforms.Compose([transforms.ToTensor()]) # } # # # load train data set # train_data_set = VOCDataSet(os.getcwd(), "2012", data_transform["train"], "train.txt") # print(len(train_data_set)) # for index in random.sample(range(0, len(train_data_set)), k=5): # img, target = train_data_set[index] # img = ts.ToPILImage()(img) # plot_img = draw_objs(img, # target["boxes"].numpy(), # target["labels"].numpy(), # np.ones(target["labels"].shape[0]), # category_index=category_index, # box_thresh=0.5, # line_thickness=3, # font='arial.ttf', # font_size=20) # plt.imshow(plot_img) # plt.show() ================================================ FILE: pytorch_object_detection/ssd/pascal_voc_classes.json ================================================ { "aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, "bottle": 5, "bus": 6, "car": 7, "cat": 8, "chair": 9, "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, "motorbike": 14, "person": 15, "pottedplant": 16, "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20 } ================================================ FILE: pytorch_object_detection/ssd/plot_curve.py ================================================ import datetime import matplotlib.pyplot as plt def plot_loss_and_lr(train_loss, learning_rate): try: x = list(range(len(train_loss))) fig, ax1 = plt.subplots(1, 1) ax1.plot(x, train_loss, 'r', label='loss') ax1.set_xlabel("epoch") ax1.set_ylabel("loss") ax1.set_title("Train Loss and lr") plt.legend(loc='best') ax2 = ax1.twinx() ax2.plot(x, learning_rate, label='lr') ax2.set_ylabel("learning rate") ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 plt.legend(loc='best') handles1, labels1 = ax1.get_legend_handles_labels() handles2, labels2 = ax2.get_legend_handles_labels() plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) plt.close() print("successful save loss curve! ") except Exception as e: print(e) def plot_map(mAP): try: x = list(range(len(mAP))) plt.plot(x, mAP, label='mAp') plt.xlabel('epoch') plt.ylabel('mAP') plt.title('Eval mAP') plt.xlim(0, len(mAP)) plt.legend(loc='best') plt.savefig('./mAP.png') plt.close() print("successful save mAP curve!") except Exception as e: print(e) ================================================ FILE: pytorch_object_detection/ssd/predict_test.py ================================================ import os import json import time import torch from PIL import Image import matplotlib.pyplot as plt import transforms from src import SSD300, Backbone from draw_box_utils import draw_objs def create_model(num_classes): backbone = Backbone() model = SSD300(backbone=backbone, num_classes=num_classes) return model def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # create model # 目标检测数 + 背景 num_classes = 20 + 1 model = create_model(num_classes=num_classes) # load train weights weights_path = "./save_weights/ssd300-14.pth" weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) # read class_indict json_path = "./pascal_voc_classes.json" assert os.path.exists(json_path), "file '{}' dose not exist.".format(json_path) json_file = open(json_path, 'r') class_dict = json.load(json_file) json_file.close() category_index = {str(v): str(k) for k, v in class_dict.items()} # load image original_img = Image.open("./test.jpg") # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.Resize(), transforms.ToTensor(), transforms.Normalization()]) img, _ = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() with torch.no_grad(): # initial model init_img = torch.zeros((1, 3, 300, 300), device=device) model(init_img) time_start = time_synchronized() predictions = model(img.to(device))[0] # bboxes_out, labels_out, scores_out time_end = time_synchronized() print("inference+NMS time: {}".format(time_end - time_start)) predict_boxes = predictions[0].to("cpu").numpy() predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0] predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1] predict_classes = predictions[1].to("cpu").numpy() predict_scores = predictions[2].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") plot_img = draw_objs(original_img, predict_boxes, predict_classes, predict_scores, category_index=category_index, box_thresh=0.5, line_thickness=3, font='arial.ttf', font_size=20) plt.imshow(plot_img) plt.show() # 保存预测的图片结果 plot_img.save("test_result.jpg") if __name__ == "__main__": main() ================================================ FILE: pytorch_object_detection/ssd/record_mAP.txt ================================================ COCO results: Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.448 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.721 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.482 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.099 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.280 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.418 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.565 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.573 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641 mAP(IoU=0.5) for each category: aeroplane : 0.8532360243584314 bicycle : 0.7496603797780927 bird : 0.7658478672087958 boat : 0.6079142920471263 bottle : 0.4986565020053691 bus : 0.8229568428349553 car : 0.7940868387465018 cat : 0.8800145761338203 chair : 0.5090524550010037 cow : 0.7344958411899583 diningtable : 0.5379541883401677 dog : 0.8230037525430133 horse : 0.7880475852689804 motorbike : 0.7879788462924051 person : 0.8351553291238482 pottedplant : 0.4420858247895347 sheep : 0.7466344247593008 sofa : 0.6627392793997164 train : 0.8380502070312741 tvmonitor : 0.7445168617489237 ================================================ FILE: pytorch_object_detection/ssd/requirements.txt ================================================ numpy matplotlib tqdm pycocotools torch==1.7.1 torchvision==0.8.2 lxml Pillow ================================================ FILE: pytorch_object_detection/ssd/src/__init__.py ================================================ from .res50_backbone import resnet50 from .ssd_model import SSD300, Backbone from .utils import dboxes300_coco, calc_iou_tensor, Encoder, PostProcess ================================================ FILE: pytorch_object_detection/ssd/src/res50_backbone.py ================================================ import torch.nn as nn import torch class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet50(num_classes=1000, include_top=True): return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) ================================================ FILE: pytorch_object_detection/ssd/src/ssd_model.py ================================================ import torch from torch import nn, Tensor from torch.jit.annotations import List from .res50_backbone import resnet50 from .utils import dboxes300_coco, Encoder, PostProcess class Backbone(nn.Module): def __init__(self, pretrain_path=None): super(Backbone, self).__init__() net = resnet50() self.out_channels = [1024, 512, 512, 256, 256, 256] if pretrain_path is not None: net.load_state_dict(torch.load(pretrain_path)) self.feature_extractor = nn.Sequential(*list(net.children())[:7]) conv4_block1 = self.feature_extractor[-1][0] # 修改conv4_block1的步距,从2->1 conv4_block1.conv1.stride = (1, 1) conv4_block1.conv2.stride = (1, 1) conv4_block1.downsample[0].stride = (1, 1) def forward(self, x): x = self.feature_extractor(x) return x class SSD300(nn.Module): def __init__(self, backbone=None, num_classes=21): super(SSD300, self).__init__() if backbone is None: raise Exception("backbone is None") if not hasattr(backbone, "out_channels"): raise Exception("the backbone not has attribute: out_channel") self.feature_extractor = backbone self.num_classes = num_classes # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50 self._build_additional_features(self.feature_extractor.out_channels) self.num_defaults = [4, 6, 6, 6, 4, 4] location_extractors = [] confidence_extractors = [] # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50 for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels): # nd is number_default_boxes, oc is output_channel location_extractors.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1)) confidence_extractors.append(nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1)) self.loc = nn.ModuleList(location_extractors) self.conf = nn.ModuleList(confidence_extractors) self._init_weights() default_box = dboxes300_coco() self.compute_loss = Loss(default_box) self.encoder = Encoder(default_box) self.postprocess = PostProcess(default_box) def _build_additional_features(self, input_size): """ 为backbone(resnet50)添加额外的一系列卷积层,得到相应的一系列特征提取器 :param input_size: :return: """ additional_blocks = [] # input_size = [1024, 512, 512, 256, 256, 256] for resnet50 middle_channels = [256, 256, 128, 128, 128] for i, (input_ch, output_ch, middle_ch) in enumerate(zip(input_size[:-1], input_size[1:], middle_channels)): padding, stride = (1, 2) if i < 3 else (0, 1) layer = nn.Sequential( nn.Conv2d(input_ch, middle_ch, kernel_size=1, bias=False), nn.BatchNorm2d(middle_ch), nn.ReLU(inplace=True), nn.Conv2d(middle_ch, output_ch, kernel_size=3, padding=padding, stride=stride, bias=False), nn.BatchNorm2d(output_ch), nn.ReLU(inplace=True), ) additional_blocks.append(layer) self.additional_blocks = nn.ModuleList(additional_blocks) def _init_weights(self): layers = [*self.additional_blocks, *self.loc, *self.conf] for layer in layers: for param in layer.parameters(): if param.dim() > 1: nn.init.xavier_uniform_(param) # Shape the classifier to the view of bboxes def bbox_view(self, features, loc_extractor, conf_extractor): locs = [] confs = [] for f, l, c in zip(features, loc_extractor, conf_extractor): # [batch, n*4, feat_size, feat_size] -> [batch, 4, -1] locs.append(l(f).view(f.size(0), 4, -1)) # [batch, n*classes, feat_size, feat_size] -> [batch, classes, -1] confs.append(c(f).view(f.size(0), self.num_classes, -1)) locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous() return locs, confs def forward(self, image, targets=None): x = self.feature_extractor(image) # Feature Map 38x38x1024, 19x19x512, 10x10x512, 5x5x256, 3x3x256, 1x1x256 detection_features = torch.jit.annotate(List[Tensor], []) # [x] detection_features.append(x) for layer in self.additional_blocks: x = layer(x) detection_features.append(x) # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4 locs, confs = self.bbox_view(detection_features, self.loc, self.conf) # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results # 38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732 if self.training: if targets is None: raise ValueError("In training mode, targets should be passed") # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732) bboxes_out = targets['boxes'] bboxes_out = bboxes_out.transpose(1, 2).contiguous() # print(bboxes_out.is_contiguous()) labels_out = targets['labels'] # print(labels_out.is_contiguous()) # ploc, plabel, gloc, glabel loss = self.compute_loss(locs, confs, bboxes_out, labels_out) return {"total_losses": loss} # 将预测回归参数叠加到default box上得到最终预测box,并执行非极大值抑制虑除重叠框 # results = self.encoder.decode_batch(locs, confs) results = self.postprocess(locs, confs) return results class Loss(nn.Module): """ Implements the loss as the sum of the followings: 1. Confidence Loss: All labels, with hard negative mining 2. Localization Loss: Only on positive labels Suppose input dboxes has the shape 8732x4 """ def __init__(self, dboxes): super(Loss, self).__init__() # Two factor are from following links # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html self.scale_xy = 1.0 / dboxes.scale_xy # 10 self.scale_wh = 1.0 / dboxes.scale_wh # 5 self.location_loss = nn.SmoothL1Loss(reduction='none') # [num_anchors, 4] -> [4, num_anchors] -> [1, 4, num_anchors] self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0), requires_grad=False) self.confidence_loss = nn.CrossEntropyLoss(reduction='none') def _location_vec(self, loc): # type: (Tensor) -> Tensor """ Generate Location Vectors 计算ground truth相对anchors的回归参数 :param loc: anchor匹配到的对应GTBOX Nx4x8732 :return: """ gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :] # Nx2x8732 gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log() # Nx2x8732 return torch.cat((gxy, gwh), dim=1).contiguous() def forward(self, ploc, plabel, gloc, glabel): # type: (Tensor, Tensor, Tensor, Tensor) -> Tensor """ ploc, plabel: Nx4x8732, Nxlabel_numx8732 predicted location and labels gloc, glabel: Nx4x8732, Nx8732 ground truth location and labels """ # 获取正样本的mask Tensor: [N, 8732] mask = torch.gt(glabel, 0) # (gt: >) # mask1 = torch.nonzero(glabel) # 计算一个batch中的每张图片的正样本个数 Tensor: [N] pos_num = mask.sum(dim=1) # 计算gt的location回归参数 Tensor: [N, 4, 8732] vec_gd = self._location_vec(gloc) # sum on four coordinates, and mask # 计算定位损失(只有正样本) loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1) # Tensor: [N, 8732] loc_loss = (mask.float() * loc_loss).sum(dim=1) # Tenosr: [N] # hard negative mining Tenosr: [N, 8732] con = self.confidence_loss(plabel, glabel) # positive mask will never selected # 获取负样本 con_neg = con.clone() con_neg[mask] = 0.0 # 按照confidence_loss降序排列 con_idx(Tensor: [N, 8732]) _, con_idx = con_neg.sort(dim=1, descending=True) _, con_rank = con_idx.sort(dim=1) # 这个步骤比较巧妙 # number of negative three times positive # 用于损失计算的负样本数是正样本的3倍(在原论文Hard negative mining部分), # 但不能超过总样本数8732 neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1) neg_mask = torch.lt(con_rank, neg_num) # (lt: <) Tensor [N, 8732] # confidence最终loss使用选取的正样本loss+选取的负样本loss con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1) # Tensor [N] # avoid no object detected # 避免出现图像中没有GTBOX的情况 total_loss = loc_loss + con_loss # eg. [15, 3, 5, 0] -> [1.0, 1.0, 1.0, 0.0] num_mask = torch.gt(pos_num, 0).float() # 统计一个batch中的每张图像中是否存在正样本 pos_num = pos_num.float().clamp(min=1e-6) # 防止出现分母为零的情况 ret = (total_loss * num_mask / pos_num).mean(dim=0) # 只计算存在正样本的图像损失 return ret ================================================ FILE: pytorch_object_detection/ssd/src/utils.py ================================================ from math import sqrt import itertools import torch import torch.nn.functional as F from torch.jit.annotations import Tuple, List from torch import nn, Tensor import numpy as np # This function is from https://github.com/kuangliu/pytorch-ssd. # def calc_iou_tensor(box1, box2): # """ Calculation of IoU based on two boxes tensor, # Reference to https://github.com/kuangliu/pytorch-src # input: # box1 (N, 4) format [xmin, ymin, xmax, ymax] # box2 (M, 4) format [xmin, ymin, xmax, ymax] # output: # IoU (N, M) # """ # N = box1.size(0) # M = box2.size(0) # # # (N, 4) -> (N, 1, 4) -> (N, M, 4) # be1 = box1.unsqueeze(1).expand(-1, M, -1) # -1 means not changing the size of that dimension # # (M, 4) -> (1, M, 4) -> (N, M, 4) # be2 = box2.unsqueeze(0).expand(N, -1, -1) # # # Left Top and Right Bottom # lt = torch.max(be1[:, :, :2], be2[:, :, :2]) # rb = torch.min(be1[:, :, 2:], be2[:, :, 2:]) # # # compute intersection area # delta = rb - lt # width and height # delta[delta < 0] = 0 # # width * height # intersect = delta[:, :, 0] * delta[:, :, 1] # # # compute bel1 area # delta1 = be1[:, :, 2:] - be1[:, :, :2] # area1 = delta1[:, :, 0] * delta1[:, :, 1] # # compute bel2 area # delta2 = be2[:, :, 2:] - be2[:, :, :2] # area2 = delta2[:, :, 0] * delta2[:, :, 1] # # iou = intersect / (area1 + area2 - intersect) # return iou def box_area(boxes): """ Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. Arguments: boxes (Tensor[N, 4]): boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format Returns: area (Tensor[N]): area for each box """ return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def calc_iou_tensor(boxes1, boxes2): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: boxes1 (Tensor[N, 4]) boxes2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ area1 = box_area(boxes1) area2 = box_area(boxes2) # When the shapes do not match, # the shape of the returned output tensor follows the broadcasting rules lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # left-top [N,M,2] rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # right-bottom [N,M,2] wh = (rb - lt).clamp(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] iou = inter / (area1[:, None] + area2 - inter) return iou # This function is from https://github.com/kuangliu/pytorch-ssd. class Encoder(object): """ Inspired by https://github.com/kuangliu/pytorch-src Transform between (bboxes, lables) <-> SSD output dboxes: default boxes in size 8732 x 4, encoder: input ltrb format, output xywh format decoder: input xywh format, output ltrb format encode: input : bboxes_in (Tensor nboxes x 4), labels_in (Tensor nboxes) output : bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732) criteria : IoU threshold of bboexes decode: input : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems) output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes) criteria : IoU threshold of bboexes max_output : maximum number of output bboxes """ def __init__(self, dboxes): self.dboxes = dboxes(order='ltrb') self.dboxes_xywh = dboxes(order='xywh').unsqueeze(dim=0) self.nboxes = self.dboxes.size(0) # default boxes的数量 self.scale_xy = dboxes.scale_xy self.scale_wh = dboxes.scale_wh def encode(self, bboxes_in, labels_in, criteria=0.5): """ encode: input : bboxes_in (Tensor nboxes x 4), labels_in (Tensor nboxes) output : bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732) criteria : IoU threshold of bboexes """ # [nboxes, 8732] ious = calc_iou_tensor(bboxes_in, self.dboxes) # 计算每个GT与default box的iou # [8732,] best_dbox_ious, best_dbox_idx = ious.max(dim=0) # 寻找每个default box匹配到的最大IoU # [nboxes,] best_bbox_ious, best_bbox_idx = ious.max(dim=1) # 寻找每个GT匹配到的最大IoU # 将每个GT匹配到的最佳default box设置为正样本(对应论文中Matching strategy的第一条) # set best ious 2.0 best_dbox_ious.index_fill_(0, best_bbox_idx, 2.0) # dim, index, value # 将相应default box匹配最大IOU的GT索引进行替换 idx = torch.arange(0, best_bbox_idx.size(0), dtype=torch.int64) best_dbox_idx[best_bbox_idx[idx]] = idx # filter IoU > 0.5 # 寻找与GT iou大于0.5的default box,对应论文中Matching strategy的第二条(这里包括了第一条匹配到的信息) masks = best_dbox_ious > criteria # [8732,] labels_out = torch.zeros(self.nboxes, dtype=torch.int64) labels_out[masks] = labels_in[best_dbox_idx[masks]] # 将default box匹配到正样本的位置设置成对应GT的box信息 bboxes_out = self.dboxes.clone() bboxes_out[masks, :] = bboxes_in[best_dbox_idx[masks], :] # Transform format to xywh format x = 0.5 * (bboxes_out[:, 0] + bboxes_out[:, 2]) # x y = 0.5 * (bboxes_out[:, 1] + bboxes_out[:, 3]) # y w = bboxes_out[:, 2] - bboxes_out[:, 0] # w h = bboxes_out[:, 3] - bboxes_out[:, 1] # h bboxes_out[:, 0] = x bboxes_out[:, 1] = y bboxes_out[:, 2] = w bboxes_out[:, 3] = h return bboxes_out, labels_out def scale_back_batch(self, bboxes_in, scores_in): """ 将box格式从xywh转换回ltrb, 将预测目标score通过softmax处理 Do scale and transform from xywh to ltrb suppose input N x 4 x num_bbox | N x label_num x num_bbox bboxes_in: 是网络预测的xywh回归参数 scores_in: 是预测的每个default box的各目标概率 """ if bboxes_in.device == torch.device("cpu"): self.dboxes = self.dboxes.cpu() self.dboxes_xywh = self.dboxes_xywh.cpu() else: self.dboxes = self.dboxes.cuda() self.dboxes_xywh = self.dboxes_xywh.cuda() # Returns a view of the original tensor with its dimensions permuted. bboxes_in = bboxes_in.permute(0, 2, 1) scores_in = scores_in.permute(0, 2, 1) # print(bboxes_in.is_contiguous()) bboxes_in[:, :, :2] = self.scale_xy * bboxes_in[:, :, :2] # 预测的x, y回归参数 bboxes_in[:, :, 2:] = self.scale_wh * bboxes_in[:, :, 2:] # 预测的w, h回归参数 # 将预测的回归参数叠加到default box上得到最终的预测边界框 bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, :, 2:] + self.dboxes_xywh[:, :, :2] bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, :, 2:] # transform format to ltrb l = bboxes_in[:, :, 0] - 0.5 * bboxes_in[:, :, 2] t = bboxes_in[:, :, 1] - 0.5 * bboxes_in[:, :, 3] r = bboxes_in[:, :, 0] + 0.5 * bboxes_in[:, :, 2] b = bboxes_in[:, :, 1] + 0.5 * bboxes_in[:, :, 3] bboxes_in[:, :, 0] = l # xmin bboxes_in[:, :, 1] = t # ymin bboxes_in[:, :, 2] = r # xmax bboxes_in[:, :, 3] = b # ymax return bboxes_in, F.softmax(scores_in, dim=-1) def decode_batch(self, bboxes_in, scores_in, criteria=0.45, max_output=200): # 将box格式从xywh转换回ltrb(方便后面非极大值抑制时求iou), 将预测目标score通过softmax处理 bboxes, probs = self.scale_back_batch(bboxes_in, scores_in) outputs = [] # 遍历一个batch中的每张image数据 for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)): bbox = bbox.squeeze(0) prob = prob.squeeze(0) outputs.append(self.decode_single_new(bbox, prob, criteria, max_output)) return outputs def decode_single_new(self, bboxes_in, scores_in, criteria, num_output=200): """ decode: input : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems) output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes) criteria : IoU threshold of bboexes max_output : maximum number of output bboxes """ device = bboxes_in.device num_classes = scores_in.shape[-1] # 对越界的bbox进行裁剪 bboxes_in = bboxes_in.clamp(min=0, max=1) # [8732, 4] -> [8732, 21, 4] bboxes_in = bboxes_in.repeat(1, num_classes).reshape(scores_in.shape[0], -1, 4) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores_in) # remove prediction with the background label # 移除归为背景类别的概率信息 bboxes_in = bboxes_in[:, 1:, :] scores_in = scores_in[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance bboxes_in = bboxes_in.reshape(-1, 4) scores_in = scores_in.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes # 移除低概率目标,self.scores_thresh=0.05 inds = torch.nonzero(scores_in > 0.05, as_tuple=False).squeeze(1) bboxes_in, scores_in, labels = bboxes_in[inds], scores_in[inds], labels[inds] # remove empty boxes ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1] keep = (ws >= 0.1 / 300) & (hs >= 0.1 / 300) keep = keep.nonzero(as_tuple=False).squeeze(1) bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep] # non-maximum suppression keep = batched_nms(bboxes_in, scores_in, labels, iou_threshold=criteria) # keep only topk scoring predictions keep = keep[:num_output] bboxes_out = bboxes_in[keep, :] scores_out = scores_in[keep] labels_out = labels[keep] return bboxes_out, labels_out, scores_out # perform non-maximum suppression def decode_single(self, bboxes_in, scores_in, criteria, max_output, max_num=200): """ decode: input : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems) output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes) criteria : IoU threshold of bboexes max_output : maximum number of output bboxes """ # Reference to https://github.com/amdegroot/ssd.pytorch bboxes_out = [] scores_out = [] labels_out = [] # 非极大值抑制算法 # scores_in (Tensor 8732 x nitems), 遍历返回每一列数据,即8732个目标的同一类别的概率 for i, score in enumerate(scores_in.split(1, 1)): # skip background if i == 0: continue # [8732, 1] -> [8732] score = score.squeeze(1) # 虑除预测概率小于0.05的目标 mask = score > 0.05 bboxes, score = bboxes_in[mask, :], score[mask] if score.size(0) == 0: continue # 按照分数从小到大排序 score_sorted, score_idx_sorted = score.sort(dim=0) # select max_output indices score_idx_sorted = score_idx_sorted[-max_num:] candidates = [] while score_idx_sorted.numel() > 0: idx = score_idx_sorted[-1].item() # 获取排名前score_idx_sorted名的bboxes信息 Tensor:[score_idx_sorted, 4] bboxes_sorted = bboxes[score_idx_sorted, :] # 获取排名第一的bboxes信息 Tensor:[4] bboxes_idx = bboxes[idx, :].unsqueeze(dim=0) # 计算前score_idx_sorted名的bboxes与第一名的bboxes的iou iou_sorted = calc_iou_tensor(bboxes_sorted, bboxes_idx).squeeze() # we only need iou < criteria # 丢弃与第一名iou > criteria的所有目标(包括自己本身) score_idx_sorted = score_idx_sorted[iou_sorted < criteria] # 保存第一名的索引信息 candidates.append(idx) # 保存该类别通过非极大值抑制后的目标信息 bboxes_out.append(bboxes[candidates, :]) # bbox坐标信息 scores_out.append(score[candidates]) # score信息 labels_out.extend([i] * len(candidates)) # 标签信息 if not bboxes_out: # 如果为空的话,返回空tensor,注意boxes对应的空tensor size,防止验证时出错 return [torch.empty(size=(0, 4)), torch.empty(size=(0,), dtype=torch.int64), torch.empty(size=(0,))] bboxes_out = torch.cat(bboxes_out, dim=0).contiguous() scores_out = torch.cat(scores_out, dim=0).contiguous() labels_out = torch.as_tensor(labels_out, dtype=torch.long) # 对所有目标的概率进行排序(无论是什 么类别),取前max_num个目标 _, max_ids = scores_out.sort(dim=0) max_ids = max_ids[-max_output:] return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids] class DefaultBoxes(object): def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, scale_xy=0.1, scale_wh=0.2): self.fig_size = fig_size # 输入网络的图像大小 300 # [38, 19, 10, 5, 3, 1] self.feat_size = feat_size # 每个预测层的feature map尺寸 self.scale_xy_ = scale_xy self.scale_wh_ = scale_wh # According to https://github.com/weiliu89/caffe # Calculation method slightly different from paper # [8, 16, 32, 64, 100, 300] self.steps = steps # 每个特征层上的一个cell在原图上的跨度 # [21, 45, 99, 153, 207, 261, 315] self.scales = scales # 每个特征层上预测的default box的scale fk = fig_size / np.array(steps) # 计算每层特征层的fk # [[2], [2, 3], [2, 3], [2, 3], [2], [2]] self.aspect_ratios = aspect_ratios # 每个预测特征层上预测的default box的ratios self.default_boxes = [] # size of feature and number of feature # 遍历每层特征层,计算default box for idx, sfeat in enumerate(self.feat_size): sk1 = scales[idx] / fig_size # scale转为相对值[0-1] sk2 = scales[idx + 1] / fig_size # scale转为相对值[0-1] sk3 = sqrt(sk1 * sk2) # 先添加两个1:1比例的default box宽和高 all_sizes = [(sk1, sk1), (sk3, sk3)] # 再将剩下不同比例的default box宽和高添加到all_sizes中 for alpha in aspect_ratios[idx]: w, h = sk1 * sqrt(alpha), sk1 / sqrt(alpha) all_sizes.append((w, h)) all_sizes.append((h, w)) # 计算当前特征层对应原图上的所有default box for w, h in all_sizes: for i, j in itertools.product(range(sfeat), repeat=2): # i -> 行(y), j -> 列(x) # 计算每个default box的中心坐标(范围是在0-1之间) cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx] self.default_boxes.append((cx, cy, w, h)) # 将default_boxes转为tensor格式 self.dboxes = torch.as_tensor(self.default_boxes, dtype=torch.float32) # 这里不转类型会报错 self.dboxes.clamp_(min=0, max=1) # 将坐标(x, y, w, h)都限制在0-1之间 # For IoU calculation # ltrb is left top coordinate and right bottom coordinate # 将(x, y, w, h)转换成(xmin, ymin, xmax, ymax),方便后续计算IoU(匹配正负样本时) self.dboxes_ltrb = self.dboxes.clone() self.dboxes_ltrb[:, 0] = self.dboxes[:, 0] - 0.5 * self.dboxes[:, 2] # xmin self.dboxes_ltrb[:, 1] = self.dboxes[:, 1] - 0.5 * self.dboxes[:, 3] # ymin self.dboxes_ltrb[:, 2] = self.dboxes[:, 0] + 0.5 * self.dboxes[:, 2] # xmax self.dboxes_ltrb[:, 3] = self.dboxes[:, 1] + 0.5 * self.dboxes[:, 3] # ymax @property def scale_xy(self): return self.scale_xy_ @property def scale_wh(self): return self.scale_wh_ def __call__(self, order='ltrb'): # 根据需求返回对应格式的default box if order == 'ltrb': return self.dboxes_ltrb if order == 'xywh': return self.dboxes def dboxes300_coco(): figsize = 300 # 输入网络的图像大小 feat_size = [38, 19, 10, 5, 3, 1] # 每个预测层的feature map尺寸 steps = [8, 16, 32, 64, 100, 300] # 每个特征层上的一个cell在原图上的跨度 # use the scales here: https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py scales = [21, 45, 99, 153, 207, 261, 315] # 每个特征层上预测的default box的scale aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] # 每个预测特征层上预测的default box的ratios dboxes = DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios) return dboxes def nms(boxes, scores, iou_threshold): # type: (Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). NMS iteratively removes lower scoring boxes which have an IoU greater than iou_threshold with another (higher scoring) box. Parameters ---------- boxes : Tensor[N, 4]) boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ return torch.ops.torchvision.nms(boxes, scores, iou_threshold) def batched_nms(boxes, scores, idxs, iou_threshold): # type: (Tensor, Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Parameters ---------- boxes : Tensor[N, 4] boxes where NMS will be performed. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes idxs : Tensor[N] indices of the categories for each one of the boxes. iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) # strategy: in order to perform NMS independently per class. # we add an offset to all the boxes. The offset is dependent # only on the class idx, and is large enough so that boxes # from different classes do not overlap # 获取所有boxes中最大的坐标值(xmin, ymin, xmax, ymax) max_coordinate = boxes.max() # to(): Performs Tensor dtype and/or device conversion # 为每一个类别生成一个很大的偏移量 # 这里的to只是让生成tensor的dytpe和device与boxes保持一致 offsets = idxs.to(boxes) * (max_coordinate + 1) # boxes加上对应层的偏移量后,保证不同类别之间boxes不会有重合的现象 boxes_for_nms = boxes + offsets[:, None] keep = nms(boxes_for_nms, scores, iou_threshold) return keep class PostProcess(nn.Module): def __init__(self, dboxes): super(PostProcess, self).__init__() # [num_anchors, 4] -> [1, num_anchors, 4] self.dboxes_xywh = nn.Parameter(dboxes(order='xywh').unsqueeze(dim=0), requires_grad=False) self.scale_xy = dboxes.scale_xy # 0.1 self.scale_wh = dboxes.scale_wh # 0.2 self.criteria = 0.5 self.max_output = 100 def scale_back_batch(self, bboxes_in, scores_in): # type: (Tensor, Tensor) -> Tuple[Tensor, Tensor] """ 1)通过预测的boxes回归参数得到最终预测坐标 2)将box格式从xywh转换回ltrb 3)将预测目标score通过softmax处理 Do scale and transform from xywh to ltrb suppose input N x 4 x num_bbox | N x label_num x num_bbox bboxes_in: [N, 4, 8732]是网络预测的xywh回归参数 scores_in: [N, label_num, 8732]是预测的每个default box的各目标概率 """ # Returns a view of the original tensor with its dimensions permuted. # [batch, 4, 8732] -> [batch, 8732, 4] bboxes_in = bboxes_in.permute(0, 2, 1) # [batch, label_num, 8732] -> [batch, 8732, label_num] scores_in = scores_in.permute(0, 2, 1) # print(bboxes_in.is_contiguous()) bboxes_in[:, :, :2] = self.scale_xy * bboxes_in[:, :, :2] # 预测的x, y回归参数 bboxes_in[:, :, 2:] = self.scale_wh * bboxes_in[:, :, 2:] # 预测的w, h回归参数 # 将预测的回归参数叠加到default box上得到最终的预测边界框 bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, :, 2:] + self.dboxes_xywh[:, :, :2] bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, :, 2:] # transform format to ltrb l = bboxes_in[:, :, 0] - 0.5 * bboxes_in[:, :, 2] t = bboxes_in[:, :, 1] - 0.5 * bboxes_in[:, :, 3] r = bboxes_in[:, :, 0] + 0.5 * bboxes_in[:, :, 2] b = bboxes_in[:, :, 1] + 0.5 * bboxes_in[:, :, 3] bboxes_in[:, :, 0] = l # xmin bboxes_in[:, :, 1] = t # ymin bboxes_in[:, :, 2] = r # xmax bboxes_in[:, :, 3] = b # ymax # scores_in: [batch, 8732, label_num] return bboxes_in, F.softmax(scores_in, dim=-1) def decode_single_new(self, bboxes_in, scores_in, criteria, num_output): # type: (Tensor, Tensor, float, int) -> Tuple[Tensor, Tensor, Tensor] """ decode: input : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems) output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes) criteria : IoU threshold of bboexes max_output : maximum number of output bboxes """ device = bboxes_in.device num_classes = scores_in.shape[-1] # 对越界的bbox进行裁剪 bboxes_in = bboxes_in.clamp(min=0, max=1) # [8732, 4] -> [8732, 21, 4] bboxes_in = bboxes_in.repeat(1, num_classes).reshape(scores_in.shape[0], -1, 4) # create labels for each prediction labels = torch.arange(num_classes, device=device) # [num_classes] -> [8732, num_classes] labels = labels.view(1, -1).expand_as(scores_in) # remove prediction with the background label # 移除归为背景类别的概率信息 bboxes_in = bboxes_in[:, 1:, :] # [8732, 21, 4] -> [8732, 20, 4] scores_in = scores_in[:, 1:] # [8732, 21] -> [8732, 20] labels = labels[:, 1:] # [8732, 21] -> [8732, 20] # batch everything, by making every class prediction be a separate instance bboxes_in = bboxes_in.reshape(-1, 4) # [8732, 20, 4] -> [8732x20, 4] scores_in = scores_in.reshape(-1) # [8732, 20] -> [8732x20] labels = labels.reshape(-1) # [8732, 20] -> [8732x20] # remove low scoring boxes # 移除低概率目标,self.scores_thresh=0.05 # inds = torch.nonzero(scores_in > 0.05).squeeze(1) inds = torch.where(torch.gt(scores_in, 0.05))[0] bboxes_in, scores_in, labels = bboxes_in[inds, :], scores_in[inds], labels[inds] # remove empty boxes ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1] keep = (ws >= 1 / 300) & (hs >= 1 / 300) # keep = keep.nonzero().squeeze(1) keep = torch.where(keep)[0] bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep] # non-maximum suppression keep = batched_nms(bboxes_in, scores_in, labels, iou_threshold=criteria) # keep only topk scoring predictions keep = keep[:num_output] bboxes_out = bboxes_in[keep, :] scores_out = scores_in[keep] labels_out = labels[keep] return bboxes_out, labels_out, scores_out def forward(self, bboxes_in, scores_in): # 通过预测的boxes回归参数得到最终预测坐标, 将预测目标score通过softmax处理 bboxes, probs = self.scale_back_batch(bboxes_in, scores_in) outputs = torch.jit.annotate(List[Tuple[Tensor, Tensor, Tensor]], []) # 遍历一个batch中的每张image数据 # bboxes: [batch, 8732, 4] for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)): # split_size, split_dim # bbox: [1, 8732, 4] bbox = bbox.squeeze(0) prob = prob.squeeze(0) outputs.append(self.decode_single_new(bbox, prob, self.criteria, self.max_output)) return outputs ================================================ FILE: pytorch_object_detection/ssd/train_multi_GPU.py ================================================ import time import os import datetime import torch import transforms from my_dataset import VOCDataSet from src import SSD300, Backbone import train_utils.train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir def create_model(num_classes): # https://download.pytorch.org/models/resnet50-19c8e357.pth # pre_train_path = "./src/resnet50.pth" backbone = Backbone(pretrain_path=None) model = SSD300(backbone=backbone, num_classes=num_classes) pre_ssd_path = "./src/nvidia_ssdpyt_fp32.pt" pre_model_dict = torch.load(pre_ssd_path, map_location='cpu') pre_weights_dict = pre_model_dict["model"] # 删除类别预测器权重,注意,回归预测器的权重可以重用,因为不涉及num_classes del_conf_loc_dict = {} for k, v in pre_weights_dict.items(): split_key = k.split(".") if "conf" in split_key: continue del_conf_loc_dict.update({k: v}) missing_keys, unexpected_keys = model.load_state_dict(del_conf_loc_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # Data loading code print("Loading data") data_transform = { "train": transforms.Compose([transforms.SSDCropping(), transforms.Resize(), transforms.ColorJitter(), transforms.ToTensor(), transforms.RandomHorizontalFlip(), transforms.Normalization(), transforms.AssignGTtoDefaultBox()]), "val": transforms.Compose([transforms.Resize(), transforms.ToTensor(), transforms.Normalization()]) } VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_data_set = VOCDataSet(VOC_root, "2012", data_transform["train"], train_set='train.txt') # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_data_set = VOCDataSet(VOC_root, "2012", data_transform["val"], train_set='val.txt') print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set) test_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set) else: train_sampler = torch.utils.data.RandomSampler(train_data_set) test_sampler = torch.utils.data.SequentialSampler(val_data_set) if args.aspect_ratio_group_factor >= 0: # 统计所有图像比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_data_set, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( train_data_set, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_data_set.collate_fn) data_loader_test = torch.utils.data.DataLoader( val_data_set, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_data_set.collate_fn) print("Creating model") model = create_model(num_classes=args.num_classes+1) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.test_only: utils.evaluate(model, data_loader_test, device=device) return train_loss = [] learning_rate = [] val_map = [] print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, warmup=True) # only first process to save training info if args.rank in [-1, 0]: train_loss.append(mean_loss.item()) learning_rate.append(lr) # update learning rate lr_scheduler.step() # evaluate after every epoch coco_info = utils.evaluate(model, data_loader_test, device=device) if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item()]] + [str(round(lr, 6))] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP if args.output_dir: # 只在主节点上执行保存权重操作 save_on_master({ 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch}, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if args.rank in [-1, 0]: # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='dataset') # 检测的目标类别个数,不包括背景 parser.add_argument('--num_classes', default=20, type=int, help='num_classes') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=8, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=20, type=int, metavar='N', help='number of total epochs to run') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 学习率,这个需要根据gpu的数量以及batch_size进行设置0.005 / 8 * num_GPU parser.add_argument('--lr', default=0.005, type=float, help='initial learning rate, 0.005 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.StepLR的参数 parser.add_argument('--lr-step-size', default=5, type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.3, type=float, help='decrease lr by a factor of lr-gamma') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 开启的进程数(注意不是线程) parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/ssd/train_ssd300.py ================================================ import os import datetime import torch import transforms from my_dataset import VOCDataSet from src import SSD300, Backbone import train_utils.train_eval_utils as utils from train_utils import get_coco_api_from_dataset def create_model(num_classes=21): # https://download.pytorch.org/models/resnet50-19c8e357.pth # pre_train_path = "./src/resnet50.pth" backbone = Backbone() model = SSD300(backbone=backbone, num_classes=num_classes) # https://ngc.nvidia.com/catalog/models -> search ssd -> download FP32 pre_ssd_path = "./src/nvidia_ssdpyt_fp32.pt" if os.path.exists(pre_ssd_path) is False: raise FileNotFoundError("nvidia_ssdpyt_fp32.pt not find in {}".format(pre_ssd_path)) pre_model_dict = torch.load(pre_ssd_path, map_location='cpu') pre_weights_dict = pre_model_dict["model"] # 删除类别预测器权重,注意,回归预测器的权重可以重用,因为不涉及num_classes del_conf_loc_dict = {} for k, v in pre_weights_dict.items(): split_key = k.split(".") if "conf" in split_key: continue del_conf_loc_dict.update({k: v}) missing_keys, unexpected_keys = model.load_state_dict(del_conf_loc_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) if not os.path.exists("save_weights"): os.mkdir("save_weights") results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.SSDCropping(), transforms.Resize(), transforms.ColorJitter(), transforms.ToTensor(), transforms.RandomHorizontalFlip(), transforms.Normalization(), transforms.AssignGTtoDefaultBox()]), "val": transforms.Compose([transforms.Resize(), transforms.ToTensor(), transforms.Normalization()]) } VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt train_dataset = VOCDataSet(VOC_root, "2012", data_transform['train'], train_set='train.txt') # 注意训练时,batch_size必须大于1 batch_size = parser_data.batch_size assert batch_size > 1, "batch size must be greater than 1" # 防止最后一个batch_size=1,如果最后一个batch_size=1就舍去 drop_last = True if len(train_dataset) % batch_size == 1 else False nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw, collate_fn=train_dataset.collate_fn, drop_last=drop_last) # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", data_transform['val'], train_set='val.txt') val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=nw, collate_fn=train_dataset.collate_fn) model = create_model(num_classes=args.num_classes+1) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005) # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if parser_data.resume != "": checkpoint = torch.load(parser_data.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) parser_data.start_epoch = checkpoint['epoch'] + 1 print("the training process from epoch{}...".format(parser_data.start_epoch)) train_loss = [] learning_rate = [] val_map = [] # 提前加载验证集数据,以免每次验证时都要重新加载一次数据,节省时间 val_data = get_coco_api_from_dataset(val_data_loader.dataset) for epoch in range(parser_data.start_epoch, parser_data.epochs): mean_loss, lr = utils.train_one_epoch(model=model, optimizer=optimizer, data_loader=train_data_loader, device=device, epoch=epoch, print_freq=50) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update learning rate lr_scheduler.step() coco_info = utils.evaluate(model=model, data_loader=val_data_loader, device=device, data_set=val_data) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item()]] + [str(round(lr, 6))] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} torch.save(save_files, "./save_weights/ssd300-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) # inputs = torch.rand(size=(2, 3, 300, 300)) # output = model(inputs) # print(output) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 检测的目标类别个数,不包括背景 parser.add_argument('--num_classes', default=20, type=int, help='num_classes') # 训练数据集的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='dataset') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=15, type=int, metavar='N', help='number of total epochs to run') # 训练的batch size parser.add_argument('--batch_size', default=4, type=int, metavar='N', help='batch size when training.') args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/ssd/train_utils/__init__.py ================================================ from .coco_utils import get_coco_api_from_dataset from .coco_eval import CocoEvaluator from .distributed_utils import init_distributed_mode, save_on_master, mkdir from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups ================================================ FILE: pytorch_object_detection/ssd/train_utils/coco_eval.py ================================================ import json import copy from collections import defaultdict import numpy as np import torch import torch._six from pycocotools.cocoeval import COCOeval from pycocotools.coco import COCO import pycocotools.mask as mask_util from train_utils.distributed_utils import all_gather class CocoEvaluator(object): def __init__(self, coco_gt, iou_types): assert isinstance(iou_types, (list, tuple)) coco_gt = copy.deepcopy(coco_gt) self.coco_gt = coco_gt self.iou_types = iou_types self.coco_eval = {} for iou_type in iou_types: self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) self.img_ids = [] self.eval_imgs = {k: [] for k in iou_types} def update(self, predictions): img_ids = list(np.unique(list(predictions.keys()))) self.img_ids.extend(img_ids) for iou_type in self.iou_types: results = self.prepare(predictions, iou_type) coco_dt = loadRes(self.coco_gt, results) if results else COCO() coco_eval = self.coco_eval[iou_type] coco_eval.cocoDt = coco_dt coco_eval.params.imgIds = list(img_ids) img_ids, eval_imgs = evaluate(coco_eval) self.eval_imgs[iou_type].append(eval_imgs) def synchronize_between_processes(self): for iou_type in self.iou_types: self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) def accumulate(self): for coco_eval in self.coco_eval.values(): coco_eval.accumulate() def summarize(self): for iou_type, coco_eval in self.coco_eval.items(): print("IoU metric: {}".format(iou_type)) coco_eval.summarize() def prepare(self, predictions, iou_type): if iou_type == "bbox": return self.prepare_for_coco_detection(predictions) elif iou_type == "segm": return self.prepare_for_coco_segmentation(predictions) elif iou_type == "keypoints": return self.prepare_for_coco_keypoint(predictions) else: raise ValueError("Unknown iou type {}".format(iou_type)) def prepare_for_coco_detection(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue # xmin, ymin, xmax, ymax boxes = prediction["boxes"] boxes = convert_to_xywh(boxes) boxes = boxes.tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes) ] ) return coco_results def prepare_for_coco_segmentation(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue scores = prediction["scores"] labels = prediction["labels"] masks = prediction["masks"] masks = masks > 0.5 scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() rles = [ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "segmentation": rle, "score": scores[k], } for k, rle in enumerate(rles) ] ) return coco_results def prepare_for_coco_keypoint(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() keypoints = prediction["keypoints"] keypoints = keypoints.flatten(start_dim=1).tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], 'keypoints': keypoint, "score": scores[k], } for k, keypoint in enumerate(keypoints) ] ) return coco_results def convert_to_xywh(boxes): xmin, ymin, xmax, ymax = boxes.unbind(1) return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) def merge(img_ids, eval_imgs): all_img_ids = all_gather(img_ids) all_eval_imgs = all_gather(eval_imgs) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_imgs = [] for p in all_eval_imgs: merged_eval_imgs.append(p) merged_img_ids = np.array(merged_img_ids) merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) # keep only unique (and in sorted order) images merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_imgs = merged_eval_imgs[..., idx] return merged_img_ids, merged_eval_imgs def create_common_coco_eval(coco_eval, img_ids, eval_imgs): img_ids, eval_imgs = merge(img_ids, eval_imgs) img_ids = list(img_ids) eval_imgs = list(eval_imgs.flatten()) coco_eval.evalImgs = eval_imgs coco_eval.params.imgIds = img_ids coco_eval._paramsEval = copy.deepcopy(coco_eval.params) ################################################################# # From pycocotools, just removed the prints and fixed # a Python3 bug about unicode not defined ################################################################# # Ideally, pycocotools wouldn't have hard-coded prints # so that we could avoid copy-pasting those two functions def createIndex(self): # create index # print('creating index...') anns, cats, imgs = {}, {}, {} imgToAnns, catToImgs = defaultdict(list), defaultdict(list) if 'annotations' in self.dataset: for ann in self.dataset['annotations']: imgToAnns[ann['image_id']].append(ann) anns[ann['id']] = ann if 'images' in self.dataset: for img in self.dataset['images']: imgs[img['id']] = img if 'categories' in self.dataset: for cat in self.dataset['categories']: cats[cat['id']] = cat if 'annotations' in self.dataset and 'categories' in self.dataset: for ann in self.dataset['annotations']: catToImgs[ann['category_id']].append(ann['image_id']) # print('index created!') # create class members self.anns = anns self.imgToAnns = imgToAnns self.catToImgs = catToImgs self.imgs = imgs self.cats = cats maskUtils = mask_util def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] # print('Loading and preparing results...') # tic = time.time() if isinstance(resFile, torch._six.string_classes): anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if 'bbox' not in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x2 - x1) * (y2 - y1) ann['id'] = id + 1 ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns createIndex(res) return res def evaluate(self): ''' Run per image evaluation on given images and store results (a list of dict) in self.evalImgs :return: None ''' # tic = time.time() # print('Running per image evaluation...') p = self.params # add backward compatibility if useSegm is specified in params if p.useSegm is not None: p.iouType = 'segm' if p.useSegm == 1 else 'bbox' print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) # print('Evaluate annotation type *{}*'.format(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) p.maxDets = sorted(p.maxDets) self.params = p self._prepare() # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] if p.iouType == 'segm' or p.iouType == 'bbox': computeIoU = self.computeIoU elif p.iouType == 'keypoints': computeIoU = self.computeOks self.ious = { (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds} evaluateImg = self.evaluateImg maxDet = p.maxDets[-1] evalImgs = [ evaluateImg(imgId, catId, areaRng, maxDet) for catId in catIds for areaRng in p.areaRng for imgId in p.imgIds ] # this is NOT in the pycocotools code, but could be done outside evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) self._paramsEval = copy.deepcopy(self.params) # toc = time.time() # print('DONE (t={:0.2f}s).'.format(toc-tic)) return p.imgIds, evalImgs ################################################################# # end of straight copy from pycocotools, just removing the prints ################################################################# ================================================ FILE: pytorch_object_detection/ssd/train_utils/coco_utils.py ================================================ from tqdm import tqdm import torch import torchvision import torch.utils.data from pycocotools.coco import COCO def convert_to_coco_api(ds): coco_ds = COCO() # annotation IDs need to start at 1, not 0 ann_id = 1 dataset = {'images': [], 'categories': [], 'annotations': []} categories = set() for img_idx in range(len(ds)): # find better way to get target targets = ds.coco_index(img_idx) image_id = targets["image_id"].item() img_dict = {} img_dict['id'] = image_id # img_dict['height'] = img.shape[-2] # img_dict['width'] = img.shape[-1] img_dict['height'] = targets["height_width"][0] img_dict['width'] = targets["height_width"][1] dataset['images'].append(img_dict) # xmin, ymin, xmax, ymax bboxes = targets["boxes"] # (xmin, ymin, xmax, ymax) to (xmin, ymin, w, h) bboxes[:, 2:] -= bboxes[:, :2] # 将box的相对坐标信息(0-1)转为绝对值坐标 bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * img_dict["width"] bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * img_dict["height"] bboxes = bboxes.tolist() labels = targets['labels'].tolist() # 注意这里的boxes area也要进行转换,否则导致(small, medium, large)计算错误 areas = (targets['area'] * img_dict["width"] * img_dict["height"]).tolist() iscrowd = targets['iscrowd'].tolist() num_objs = len(bboxes) for i in range(num_objs): ann = {} ann['image_id'] = image_id ann['bbox'] = bboxes[i] ann['category_id'] = labels[i] categories.add(labels[i]) ann['area'] = areas[i] ann['iscrowd'] = iscrowd[i] ann['id'] = ann_id dataset['annotations'].append(ann) ann_id += 1 dataset['categories'] = [{'id': i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds def get_coco_api_from_dataset(dataset): for _ in range(10): if isinstance(dataset, torchvision.datasets.CocoDetection): break if isinstance(dataset, torch.utils.data.Subset): dataset = dataset.dataset if isinstance(dataset, torchvision.datasets.CocoDetection): return dataset.coco return convert_to_coco_api(dataset) ================================================ FILE: pytorch_object_detection/ssd/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() if world_size == 1: return [data] # serialized to a Tensor buffer = pickle.dumps(data) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to("cuda") # obtain Tensor size of each rank local_size = torch.tensor([tensor.numel()], device="cuda") size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] dist.all_gather(size_list, local_size) size_list = [int(size.item()) for size in size_list] max_size = max(size_list) # receiving Tensor from all ranks # we pad the tensor because torch all_gather does not support # gathering tensors of different shapes tensor_list = [] for _ in size_list: tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) if local_size != max_size: padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") tensor = torch.cat((tensor, padding), dim=0) dist.all_gather(tensor_list, tensor) data_list = [] for size, tensor in zip(size_list, tensor_list): buffer = tensor.cpu().numpy().tobytes()[:size] data_list.append(pickle.loads(buffer)) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_object_detection/ssd/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k+1等份 bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_object_detection/ssd/train_utils/train_eval_utils.py ================================================ import math import sys import time import torch from train_utils import get_coco_api_from_dataset, CocoEvaluator import train_utils.distributed_utils as utils def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 5.0 / 10000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mloss = torch.zeros(1).to(device) # mean losses for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)): # batch inputs information images = torch.stack(images, dim=0) boxes = [] labels = [] img_id = [] for t in targets: boxes.append(t['boxes']) labels.append(t['labels']) img_id.append(t["image_id"]) targets = {"boxes": torch.stack(boxes, dim=0), "labels": torch.stack(labels, dim=0), "image_id": torch.as_tensor(img_id)} images = images.to(device) targets = {k: v.to(device) for k, v in targets.items()} losses_dict = model(images, targets) losses = losses_dict["total_losses"] # reduce losses over all GPUs for logging purpose losses_dict_reduced = utils.reduce_dict(losses_dict) losses_reduce = losses_dict_reduced["total_losses"] loss_value = losses_reduce.detach() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(losses_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() # metric_logger.update(loss=losses, **loss_dict_reduced) metric_logger.update(**losses_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, device, data_set=None): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if data_set is None: data_set = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(data_set, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = torch.stack(images, dim=0).to(device) if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() # list((bboxes_out, labels_out, scores_out), ...) results = model(images, targets=None) model_time = time.time() - model_time outputs = [] for index, (bboxes_out, labels_out, scores_out) in enumerate(results): # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax) height_width = targets[index]["height_width"] # 还原回原图尺度 bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1] bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0] info = {"boxes": bboxes_out.to(cpu_device), "labels": labels_out.to(cpu_device), "scores": scores_out.to(cpu_device)} outputs.append(info) res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist() # numpy to list return coco_info def _get_iou_types(model): model_without_ddp = model if isinstance(model, torch.nn.parallel.DistributedDataParallel): model_without_ddp = model.module iou_types = ["bbox"] return iou_types ================================================ FILE: pytorch_object_detection/ssd/transforms.py ================================================ import random import torch import torchvision.transforms as t from torchvision.transforms import functional as F from src import dboxes300_coco, calc_iou_tensor, Encoder class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target=None): for trans in self.transforms: image, target = trans(image, target) return image, target class ToTensor(object): """将PIL图像转为Tensor""" def __call__(self, image, target): image = F.to_tensor(image).contiguous() return image, target class RandomHorizontalFlip(object): """随机水平翻转图像以及bboxes,该方法应放在ToTensor后""" def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: # height, width = image.shape[-2:] image = image.flip(-1) # 水平翻转图片 bbox = target["boxes"] # bbox: xmin, ymin, xmax, ymax # bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 bbox[:, [0, 2]] = 1.0 - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 target["boxes"] = bbox return image, target # This function is from https://github.com/chauhan-utk/ssd.DomainAdaptation. class SSDCropping(object): """ 根据原文,对图像进行裁剪,该方法应放在ToTensor前 Cropping for SSD, according to original paper Choose between following 3 conditions: 1. Preserve the original image 2. Random crop minimum IoU is among 0.1, 0.3, 0.5, 0.7, 0.9 3. Random crop Reference to https://github.com/chauhan-utk/src.DomainAdaptation """ def __init__(self): self.sample_options = ( # Do nothing None, # min IoU, max IoU (0.1, None), (0.3, None), (0.5, None), (0.7, None), (0.9, None), # no IoU requirements (None, None), ) self.dboxes = dboxes300_coco() def __call__(self, image, target): # Ensure always return cropped image while True: mode = random.choice(self.sample_options) if mode is None: # 不做随机裁剪处理 return image, target htot, wtot = target['height_width'] min_iou, max_iou = mode min_iou = float('-inf') if min_iou is None else min_iou max_iou = float('+inf') if max_iou is None else max_iou # Implementation use 5 iteration to find possible candidate for _ in range(5): # 0.3*0.3 approx. 0.1 w = random.uniform(0.3, 1.0) h = random.uniform(0.3, 1.0) if w/h < 0.5 or w/h > 2: # 保证宽高比例在0.5-2之间 continue # left 0 ~ wtot - w, top 0 ~ htot - h left = random.uniform(0, 1.0 - w) top = random.uniform(0, 1.0 - h) right = left + w bottom = top + h # boxes的坐标是在0-1之间的 bboxes = target["boxes"] ious = calc_iou_tensor(bboxes, torch.tensor([[left, top, right, bottom]])) # tailor all the bboxes and return # all(): Returns True if all elements in the tensor are True, False otherwise. if not ((ious > min_iou) & (ious < max_iou)).all(): continue # discard any bboxes whose center not in the cropped image xc = 0.5 * (bboxes[:, 0] + bboxes[:, 2]) yc = 0.5 * (bboxes[:, 1] + bboxes[:, 3]) # 查找所有的gt box的中心点有没有在采样patch中的 masks = (xc > left) & (xc < right) & (yc > top) & (yc < bottom) # if no such boxes, continue searching again # 如果所有的gt box的中心点都不在采样的patch中,则重新找 if not masks.any(): continue # 修改采样patch中的所有gt box的坐标(防止出现越界的情况) bboxes[bboxes[:, 0] < left, 0] = left bboxes[bboxes[:, 1] < top, 1] = top bboxes[bboxes[:, 2] > right, 2] = right bboxes[bboxes[:, 3] > bottom, 3] = bottom # 虑除不在采样patch中的gt box bboxes = bboxes[masks, :] # 获取在采样patch中的gt box的标签 labels = target['labels'] labels = labels[masks] # 裁剪patch left_idx = int(left * wtot) top_idx = int(top * htot) right_idx = int(right * wtot) bottom_idx = int(bottom * htot) image = image.crop((left_idx, top_idx, right_idx, bottom_idx)) # 调整裁剪后的bboxes坐标信息 bboxes[:, 0] = (bboxes[:, 0] - left) / w bboxes[:, 1] = (bboxes[:, 1] - top) / h bboxes[:, 2] = (bboxes[:, 2] - left) / w bboxes[:, 3] = (bboxes[:, 3] - top) / h # 更新crop后的gt box坐标信息以及标签信息 target['boxes'] = bboxes target['labels'] = labels return image, target class Resize(object): """对图像进行resize处理,该方法应放在ToTensor前""" def __init__(self, size=(300, 300)): self.resize = t.Resize(size) def __call__(self, image, target): image = self.resize(image) return image, target class ColorJitter(object): """对图像颜色信息进行随机调整,该方法应放在ToTensor前""" def __init__(self, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05): self.trans = t.ColorJitter(brightness, contrast, saturation, hue) def __call__(self, image, target): image = self.trans(image) return image, target class Normalization(object): """对图像标准化处理,该方法应放在ToTensor后""" def __init__(self, mean=None, std=None): if mean is None: mean = [0.485, 0.456, 0.406] if std is None: std = [0.229, 0.224, 0.225] self.normalize = t.Normalize(mean=mean, std=std) def __call__(self, image, target): image = self.normalize(image) return image, target class AssignGTtoDefaultBox(object): """将DefaultBox与GT进行匹配""" def __init__(self): self.default_box = dboxes300_coco() self.encoder = Encoder(self.default_box) def __call__(self, image, target): boxes = target['boxes'] labels = target["labels"] # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732) bboxes_out, labels_out = self.encoder.encode(boxes, labels) target['boxes'] = bboxes_out target['labels'] = labels_out return image, target ================================================ FILE: pytorch_object_detection/ssd/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 以及每个类别的mAP(IoU=0.5) """ import os import json import torch from tqdm import tqdm import numpy as np import transforms from src import Backbone, SSD300 from my_dataset import VOCDataSet from train_utils import get_coco_api_from_dataset, CocoEvaluator def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 12, [""] * 12 stats[0], print_list[0] = _summarize(1) stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0]) stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1]) stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2]) stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = { "val": transforms.Compose([transforms.Resize(), transforms.ToTensor(), transforms.Normalization()]) } # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: class_dict = json.load(f) category_index = {v: k for k, v in class_dict.items()} VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt val_dataset = VOCDataSet(VOC_root, "2012", transforms=data_transform["val"], train_set="val.txt") val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + 20 classes backbone = Backbone() model = SSD300(backbone=backbone, num_classes=parser_data.num_classes + 1) # 载入你自己训练好的模型权重 weights_path = parser_data.weights assert os.path.exists(weights_path), "not found {} file.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) # print(model) model.to(device) # evaluate on the test dataset coco = get_coco_api_from_dataset(val_dataset) iou_types = ["bbox"] coco_evaluator = CocoEvaluator(coco, iou_types) cpu_device = torch.device("cpu") model.eval() with torch.no_grad(): for images, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device images = torch.stack(images, dim=0).to(device) # inference results = model(images) outputs = [] for index, (bboxes_out, labels_out, scores_out) in enumerate(results): # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax) height_width = targets[index]["height_width"] # 还原回原图尺度 bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1] bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0] info = {"boxes": bboxes_out.to(cpu_device), "labels": labels_out.to(cpu_device), "scores": scores_out.to(cpu_device)} outputs.append(info) res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_eval = coco_evaluator.coco_eval["bbox"] # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_eval) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_eval, catId=i) voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = ["COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc] f.write("\n".join(record_lines)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数 parser.add_argument('--num-classes', type=int, default='20', help='number of classes') # 数据集的根目录(VOCdevkit根目录) parser.add_argument('--data-path', default='/data/', help='dataset root') # 训练好的权重文件 parser.add_argument('--weights', default='./save_weights/model.pth', type=str, help='training weights') # batch size parser.add_argument('--batch_size', default=1, type=int, metavar='N', help='batch size when validation.') args = parser.parse_args() main(args) ================================================ FILE: pytorch_object_detection/train_coco_dataset/README.md ================================================ # 训练COCO2017数据集 ## 该项目参考自pytorch官方torchvision模块中的源码(使用pycocotools处略有不同) * https://github.com/pytorch/vision/tree/master/references/detection ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10.0 * pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs)) * Ubuntu或Centos(不建议Windows) * 最好使用GPU训练 * 详细环境配置见```requirements.txt``` ## 文件结构: ``` ├── backbone: 特征提取网络,可以根据自己的要求选择,这里是以VGG16为例 ├── network_files: Faster R-CNN网络(包括Fast R-CNN以及RPN等模块) ├── train_utils: 训练验证相关模块(包括pycocotools) ├── my_dataset.py: 自定义dataset用于读取COCO2017数据集 ├── train.py: 以resnet50做为backbone进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标,并生成record_mAP.txt文件 └── transforms.py: 数据预处理(随机水平翻转图像以及bboxes、将PIL图像转为Tensor) ``` ## 预训练权重下载地址(下载后放入项目根目录): * Resnet50 https://download.pytorch.org/models/resnet50-19c8e357.pth * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是`resnet50.pth`文件, 不是`resnet50-19c8e357.pth` ## 数据集,本例程使用的是COCO2017数据集 * COCO官网地址:https://cocodataset.org/ * 对数据集不了解的可以看下我写的博文:https://blog.csdn.net/qq_37541097/article/details/113247318 * 这里以下载coco2017数据集为例,主要下载三个文件: * `2017 Train images [118K/18GB]`:训练过程中使用到的所有图像文件 * `2017 Val images [5K/1GB]`:验证过程中使用到的所有图像文件 * `2017 Train/Val annotations [241MB]`:对应训练集和验证集的标注json文件 * 都解压到`coco2017`文件夹下,可得到如下文件结构: ``` ├── coco2017: 数据集根目录 ├── train2017: 所有训练图像文件夹(118287张) ├── val2017: 所有验证图像文件夹(5000张) └── annotations: 对应标注文件夹 ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件 ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件 ├── captions_train2017.json: 对应图像描述的训练集标注文件 ├── captions_val2017.json: 对应图像描述的验证集标注文件 ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件 └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹 ``` ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要使用单GPU训练直接使用train.py训练脚本 * 若要使用多GPU训练,使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py` ## 注意事项 * 在使用训练脚本时,注意要将`--data-path`设置为自己存放`coco2017`文件夹所在的**根目录** * 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 * 在使用预测脚本时,要将`weights_path`设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可,其他代码尽量不要改动 ## 本项目训练得到的权重(Faster R-CNN + Resnet50) * 链接: https://pan.baidu.com/s/1iF-Yl_9TkFFeAy-JysfGSw 密码: d2d8 * COCO2017验证集mAP: ``` Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.277 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.453 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.290 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.126 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.308 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.378 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.243 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.358 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.366 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.169 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.402 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.512 ``` ## 如果对Faster RCNN原理不是很理解可参考我的bilibili * https://b23.tv/sXcBSP ## Faster RCNN框架图 ![Faster R-CNN](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing/raw/master/pytorch_object_detection/faster_rcnn/fasterRCNN.png) ================================================ FILE: pytorch_object_detection/train_coco_dataset/backbone/__init__.py ================================================ from .resnet50_fpn_model import resnet50_fpn_backbone from .mobilenetv2_model import MobileNetV2 from .vgg_model import vgg from .resnet import * from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool ================================================ FILE: pytorch_object_detection/train_coco_dataset/backbone/feature_pyramid_network.py ================================================ from collections import OrderedDict import torch.nn as nn import torch from torch import Tensor import torch.nn.functional as F from torch.jit.annotations import Tuple, List, Dict class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Arguments: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model, return_layers): if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} layers = OrderedDict() # 遍历模型子模块按顺序存入有序字典 # 只保存layer4及其之前的结构,舍去之后不用的结构 for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super().__init__(layers) self.return_layers = orig_return_layers def forward(self, x): out = OrderedDict() # 依次遍历模型的所有子模块,并进行正向传播, # 收集layer1, layer2, layer3, layer4的输出 for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class BackboneWithFPN(nn.Module): """ Adds a FPN on top of a model. Internally, it uses torchvision.models._utils.IntermediateLayerGetter to extract a submodel that returns the feature maps specified in return_layers. The same limitations of IntermediatLayerGetter apply here. Arguments: backbone (nn.Module) return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). in_channels_list (List[int]): number of channels for each feature map that is returned, in the order they are present in the OrderedDict out_channels (int): number of channels in the FPN. extra_blocks: ExtraFPNBlock Attributes: out_channels (int): the number of channels in the FPN """ def __init__(self, backbone: nn.Module, return_layers=None, in_channels_list=None, out_channels=256, extra_blocks=None, re_getter=True): super().__init__() if extra_blocks is None: extra_blocks = LastLevelMaxPool() if re_getter: assert return_layers is not None self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) else: self.body = backbone self.fpn = FeaturePyramidNetwork( in_channels_list=in_channels_list, out_channels=out_channels, extra_blocks=extra_blocks, ) self.out_channels = out_channels def forward(self, x): x = self.body(x) x = self.fpn(x) return x class FeaturePyramidNetwork(nn.Module): """ Module that adds a FPN from on top of a set of feature maps. This is based on `"Feature Pyramid Network for Object Detection" `_. The feature maps are currently supposed to be in increasing depth order. The input to the model is expected to be an OrderedDict[Tensor], containing the feature maps on top of which the FPN will be added. Arguments: in_channels_list (list[int]): number of channels for each feature map that is passed to the module out_channels (int): number of channels of the FPN representation extra_blocks (ExtraFPNBlock or None): if provided, extra operations will be performed. It is expected to take the fpn features, the original features and the names of the original features as input, and returns a new list of feature maps and their corresponding names """ def __init__(self, in_channels_list, out_channels, extra_blocks=None): super().__init__() # 用来调整resnet特征矩阵(layer1,2,3,4)的channel(kernel_size=1) self.inner_blocks = nn.ModuleList() # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵 self.layer_blocks = nn.ModuleList() for in_channels in in_channels_list: if in_channels == 0: continue inner_block_module = nn.Conv2d(in_channels, out_channels, 1) layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) self.inner_blocks.append(inner_block_module) self.layer_blocks.append(layer_block_module) # initialize parameters now to avoid modifying the initialization of top_blocks for m in self.children(): if isinstance(m, nn.Conv2d): nn.init.kaiming_uniform_(m.weight, a=1) nn.init.constant_(m.bias, 0) self.extra_blocks = extra_blocks def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.inner_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.inner_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.inner_blocks: if i == idx: out = module(x) i += 1 return out def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: """ This is equivalent to self.layer_blocks[idx](x), but torchscript doesn't support this yet """ num_blocks = len(self.layer_blocks) if idx < 0: idx += num_blocks i = 0 out = x for module in self.layer_blocks: if i == idx: out = module(x) i += 1 return out def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: """ Computes the FPN for a set of feature maps. Arguments: x (OrderedDict[Tensor]): feature maps for each feature level. Returns: results (OrderedDict[Tensor]): feature maps after FPN layers. They are ordered from highest resolution first. """ # unpack OrderedDict into two lists for easier handling names = list(x.keys()) x = list(x.values()) # 将resnet layer4的channel调整到指定的out_channels # last_inner = self.inner_blocks[-1](x[-1]) last_inner = self.get_result_from_inner_blocks(x[-1], -1) # result中保存着每个预测特征层 results = [] # 将layer4调整channel后的特征矩阵,通过3x3卷积后得到对应的预测特征矩阵 # results.append(self.layer_blocks[-1](last_inner)) results.append(self.get_result_from_layer_blocks(last_inner, -1)) for idx in range(len(x) - 2, -1, -1): inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) feat_shape = inner_lateral.shape[-2:] inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") last_inner = inner_lateral + inner_top_down results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) # 在layer4对应的预测特征层基础上生成预测特征矩阵5 if self.extra_blocks is not None: results, names = self.extra_blocks(results, x, names) # make it back an OrderedDict out = OrderedDict([(k, v) for k, v in zip(names, results)]) return out class LastLevelMaxPool(torch.nn.Module): """ Applies a max_pool2d on top of the last feature map """ def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]: names.append("pool") x.append(F.max_pool2d(x[-1], 1, 2, 0)) return x, names ================================================ FILE: pytorch_object_detection/train_coco_dataset/backbone/mobilenetv2_model.py ================================================ from torch import nn import torch def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(nn.Sequential): def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1, norm_layer=None): padding = (kernel_size - 1) // 2 if norm_layer is None: norm_layer = nn.BatchNorm2d super(ConvBNReLU, self).__init__( nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False), norm_layer(out_channel), nn.ReLU6(inplace=True) ) class InvertedResidual(nn.Module): def __init__(self, in_channel, out_channel, stride, expand_ratio, norm_layer=None): super(InvertedResidual, self).__init__() hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel if norm_layer is None: norm_layer = nn.BatchNorm2d layers = [] if expand_ratio != 1: # 1x1 pointwise conv layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1, norm_layer=norm_layer)) layers.extend([ # 3x3 depthwise conv ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel, norm_layer=norm_layer), # 1x1 pointwise conv(linear) nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False), norm_layer(out_channel), ]) self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_shortcut: return x + self.conv(x) else: return self.conv(x) class MobileNetV2(nn.Module): def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8, weights_path=None, norm_layer=None): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) if norm_layer is None: norm_layer = nn.BatchNorm2d inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] features = [] # conv1 layer features.append(ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)) # building inverted residual residual blockes for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) input_channel = output_channel # building last several layers features.append(ConvBNReLU(input_channel, last_channel, 1, norm_layer=norm_layer)) # combine feature layers self.features = nn.Sequential(*features) # building classifier self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(last_channel, num_classes) ) if weights_path is None: # weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) else: self.load_state_dict(torch.load(weights_path)) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ================================================ FILE: pytorch_object_detection/train_coco_dataset/backbone/resnet.py ================================================ import torch.nn as nn import torch class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): """ 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 这么做的好处是能够在top1上提升大概0.5%的准确率。 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch """ expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, groups=1, width_per_group=64): super(Bottleneck, self).__init__() width = int(out_channel * (width_per_group / 64.)) * groups self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = nn.BatchNorm2d(width) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = nn.BatchNorm2d(width) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = nn.BatchNorm2d(out_channel*self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, groups=1, width_per_group=64): super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 self.groups = groups self.width_per_group = width_per_group self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, groups=self.groups, width_per_group=self.width_per_group)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet34(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnet34-333f7ec4.pth return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet50(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnet50-19c8e357.pth return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top) def resnet101(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top) def resnext50_32x4d(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth groups = 32 width_per_group = 4 return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top, groups=groups, width_per_group=width_per_group) def resnext101_32x8d(num_classes=1000, include_top=True): # https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth groups = 32 width_per_group = 8 return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top, groups=groups, width_per_group=width_per_group) ================================================ FILE: pytorch_object_detection/train_coco_dataset/backbone/resnet50_fpn_model.py ================================================ import os import torch import torch.nn as nn from torchvision.ops.misc import FrozenBatchNorm2d from .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # squeeze channels self.bn1 = norm_layer(out_channel) # ----------------------------------------- self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) self.bn2 = norm_layer(out_channel) # ----------------------------------------- self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion, kernel_size=1, stride=1, bias=False) # unsqueeze channels self.bn3 = norm_layer(out_channel * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x if self.downsample is not None: identity = self.downsample(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None): super().__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.include_top = include_top self.in_channel = 64 self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, blocks_num[0]) self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2) self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2) self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2) if self.include_top: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') def _make_layer(self, block, channel, block_num, stride=1): norm_layer = self._norm_layer downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), norm_layer(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, norm_layer=norm_layer)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, norm_layer=norm_layer)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if self.include_top: x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def overwrite_eps(model, eps): """ This method overwrites the default eps values of all the FrozenBatchNorm2d layers of the model with the provided value. This is necessary to address the BC-breaking change introduced by the bug-fix at pytorch/vision#2933. The overwrite is applied only when the pretrained weights are loaded to maintain compatibility with previous versions. Args: model (nn.Module): The model on which we perform the overwrite. eps (float): The new value of eps. """ for module in model.modules(): if isinstance(module, FrozenBatchNorm2d): module.eps = eps def resnet50_fpn_backbone(pretrain_path="", norm_layer=FrozenBatchNorm2d, # FrozenBatchNorm2d的功能与BatchNorm2d类似,但参数无法更新 trainable_layers=3, returned_layers=None, extra_blocks=None): """ 搭建resnet50_fpn——backbone Args: pretrain_path: resnet50的预训练权重,如果不使用就默认为空 norm_layer: 官方默认的是FrozenBatchNorm2d,即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差,还不如不用bn层) 如果自己的GPU显存很大可以设置很大的batch_size,那么自己可以传入正常的BatchNorm2d层 (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) trainable_layers: 指定训练哪些层结构 returned_layers: 指定哪些层的输出需要返回 extra_blocks: 在输出的特征层基础上额外添加的层结构 Returns: """ resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3], include_top=False, norm_layer=norm_layer) if isinstance(norm_layer, FrozenBatchNorm2d): overwrite_eps(resnet_backbone, 0.0) if pretrain_path != "": assert os.path.exists(pretrain_path), "{} is not exist.".format(pretrain_path) # 载入预训练权重 print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False)) # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # 如果要训练所有层结构的话,不要忘了conv1后还有一个bn1 if trainable_layers == 5: layers_to_train.append("bn1") # freeze layers for name, parameter in resnet_backbone.named_parameters(): # 只训练不在layers_to_train列表中的层结构 if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) if extra_blocks is None: extra_blocks = LastLevelMaxPool() if returned_layers is None: returned_layers = [1, 2, 3, 4] # 返回的特征层个数肯定大于0小于5 assert min(returned_layers) > 0 and max(returned_layers) < 5 # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)} # in_channel 为layer4的输出特征矩阵channel = 2048 in_channels_stage2 = resnet_backbone.in_channel // 8 # 256 # 记录resnet50提供给fpn的每个特征层channel in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] # 通过fpn后得到的每个特征层的channel out_channels = 256 return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) ================================================ FILE: pytorch_object_detection/train_coco_dataset/backbone/vgg_model.py ================================================ import torch.nn as nn import torch class VGG(nn.Module): def __init__(self, features, class_num=1000, init_weights=False, weights_path=None): super(VGG, self).__init__() self.features = features self.classifier = nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, class_num) ) if init_weights and weights_path is None: self._initialize_weights() if weights_path is not None: self.load_state_dict(torch.load(weights_path)) def forward(self, x): # N x 3 x 224 x 224 x = self.features(x) # N x 512 x 7 x 7 x = torch.flatten(x, start_dim=1) # N x 512*7*7 x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.xavier_uniform_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight) # nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) def make_features(cfg: list): layers = [] in_channels = 3 for v in cfg: if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) layers += [conv2d, nn.ReLU(True)] in_channels = v return nn.Sequential(*layers) cfgs = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } def vgg(model_name="vgg16", weights_path=None): assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name) cfg = cfgs[model_name] model = VGG(make_features(cfg), weights_path=weights_path) return model ================================================ FILE: pytorch_object_detection/train_coco_dataset/change_backbone_with_fpn.py ================================================ import os import datetime import torch import transforms from network_files import FasterRCNN, AnchorsGenerator from my_dataset import CocoDetection from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from train_utils import train_eval_utils as utils from backbone import BackboneWithFPN, LastLevelMaxPool def create_model(num_classes): import torchvision from torchvision.models.feature_extraction import create_feature_extractor # --- mobilenet_v3_large fpn backbone --- # backbone = torchvision.models.mobilenet_v3_large(pretrained=True) # print(backbone) return_layers = {"features.6": "0", # stride 8 "features.12": "1", # stride 16 "features.16": "2"} # stride 32 # 提供给fpn的每个特征层channel in_channels_list = [40, 112, 960] new_backbone = create_feature_extractor(backbone, return_layers) # img = torch.randn(1, 3, 224, 224) # outputs = new_backbone(img) # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()] # --- efficientnet_b0 fpn backbone --- # # backbone = torchvision.models.efficientnet_b0(pretrained=True) # # print(backbone) # return_layers = {"features.3": "0", # stride 8 # "features.4": "1", # stride 16 # "features.8": "2"} # stride 32 # # 提供给fpn的每个特征层channel # in_channels_list = [40, 80, 1280] # new_backbone = create_feature_extractor(backbone, return_layers) # # img = torch.randn(1, 3, 224, 224) # # outputs = new_backbone(img) # # [print(f"{k} shape: {v.shape}") for k, v in outputs.items()] backbone_with_fpn = BackboneWithFPN(new_backbone, return_layers=return_layers, in_channels_list=in_channels_list, out_channels=256, extra_blocks=LastLevelMaxPool(), re_getter=False) anchor_sizes = ((64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) anchor_generator = AnchorsGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2'], # 在哪些特征层上进行RoIAlign pooling output_size=[7, 7], # RoIAlign pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone_with_fpn, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } COCO_root = args.data_path # load train data set # coco2017 -> annotations -> instances_train2017.json train_dataset = CocoDetection(COCO_root, "train", data_transform["train"]) train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # coco2017 -> annotations -> instances_val2017.json val_dataset = CocoDetection(COCO_root, "val", data_transform["val"]) val_data_set_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model num_classes equal background + classes model = create_model(num_classes=args.num_classes + 1) # print(model) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if args.resume != "": checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) print("the training process from epoch{}...".format(args.start_epoch)) train_loss = [] learning_rate = [] val_map = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 10 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device=device, epoch=epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_set_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/model-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录 parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=90, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=26, type=int, metavar='N', help='number of total epochs to run') # 学习率 parser.add_argument('--lr', default=0.005, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练的batch size parser.add_argument('--batch_size', default=4, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/train_coco_dataset/coco91_indices.json ================================================ { "1": "person", "2": "bicycle", "3": "car", "4": "motorcycle", "5": "airplane", "6": "bus", "7": "train", "8": "truck", "9": "boat", "10": "traffic light", "11": "fire hydrant", "12": "N/A", "13": "stop sign", "14": "parking meter", "15": "bench", "16": "bird", "17": "cat", "18": "dog", "19": "horse", "20": "sheep", "21": "cow", "22": "elephant", "23": "bear", "24": "zebra", "25": "giraffe", "26": "N/A", "27": "backpack", "28": "umbrella", "29": "N/A", "30": "N/A", "31": "handbag", "32": "tie", "33": "suitcase", "34": "frisbee", "35": "skis", "36": "snowboard", "37": "sports ball", "38": "kite", "39": "baseball bat", "40": "baseball glove", "41": "skateboard", "42": "surfboard", "43": "tennis racket", "44": "bottle", "45": "N/A", "46": "wine glass", "47": "cup", "48": "fork", "49": "knife", "50": "spoon", "51": "bowl", "52": "banana", "53": "apple", "54": "sandwich", "55": "orange", "56": "broccoli", "57": "carrot", "58": "hot dog", "59": "pizza", "60": "donut", "61": "cake", "62": "chair", "63": "couch", "64": "potted plant", "65": "bed", "66": "N/A", "67": "dining table", "68": "N/A", "69": "N/A", "70": "toilet", "71": "N/A", "72": "tv", "73": "laptop", "74": "mouse", "75": "remote", "76": "keyboard", "77": "cell phone", "78": "microwave", "79": "oven", "80": "toaster", "81": "sink", "82": "refrigerator", "83": "N/A", "84": "book", "85": "clock", "86": "vase", "87": "scissors", "88": "teddy bear", "89": "hair drier", "90": "toothbrush" } ================================================ FILE: pytorch_object_detection/train_coco_dataset/compute_receptive_field.py ================================================ # vgg16(D) model = [[3, 1], [3, 1], [2, 2], # maxpool [3, 1], [3, 1], [2, 2], # maxpool [3, 1], [3, 1], [3, 1], [2, 2], # maxpool [3, 1], [3, 1], [3, 1], [2, 2], # maxpool [3, 1], [3, 1], [3, 1]] field = model[-1][0] for kernel, stride in model[::-1]: field = (field - 1) * stride + kernel print(field) # 228 ================================================ FILE: pytorch_object_detection/train_coco_dataset/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = False): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: pytorch_object_detection/train_coco_dataset/my_dataset.py ================================================ import os import json import torch from PIL import Image import torch.utils.data as data from pycocotools.coco import COCO def _coco_remove_images_without_annotations(dataset, ids): """ 删除coco数据集中没有目标,或者目标面积非常小的数据 refer to: https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py :param dataset: :param cat_list: :return: """ def _has_only_empty_bbox(anno): return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) def _has_valid_annotation(anno): # if it's empty, there is no annotation if len(anno) == 0: return False # if all boxes have close to zero area, there is no annotation if _has_only_empty_bbox(anno): return False return True valid_ids = [] for ds_idx, img_id in enumerate(ids): ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None) anno = dataset.loadAnns(ann_ids) if _has_valid_annotation(anno): valid_ids.append(img_id) return valid_ids class CocoDetection(data.Dataset): """`MS Coco Detection `_ Dataset. Args: root (string): Root directory where images are downloaded to. annFile (string): Path to json annotation file. transforms (callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. """ def __init__(self, root, dataset="train", transforms=None): super(CocoDetection, self).__init__() assert dataset in ["train", "val"], 'dataset must be in ["train", "val"]' anno_file = "instances_{}2017.json".format(dataset) assert os.path.exists(root), "file '{}' does not exist.".format(root) self.img_root = os.path.join(root, "{}2017".format(dataset)) assert os.path.exists(self.img_root), "path '{}' does not exist.".format(self.img_root) self.anno_path = os.path.join(root, "annotations", anno_file) assert os.path.exists(self.anno_path), "file '{}' does not exist.".format(self.anno_path) self.mode = dataset self.transforms = transforms self.coco = COCO(self.anno_path) # 获取coco数据索引与类别名称的关系 # 注意在object80中的索引并不是连续的,虽然只有80个类别,但索引还是按照stuff91来排序的 data_classes = dict([(v["id"], v["name"]) for k, v in self.coco.cats.items()]) max_index = max(data_classes.keys()) # 90 # 将缺失的类别名称设置成N/A coco_classes = {} for k in range(1, max_index + 1): if k in data_classes: coco_classes[k] = data_classes[k] else: coco_classes[k] = "N/A" if dataset == "train": json_str = json.dumps(coco_classes, indent=4) with open("coco91_indices.json", "w") as f: f.write(json_str) self.coco_classes = coco_classes ids = list(sorted(self.coco.imgs.keys())) if dataset == "train": # 移除没有目标,或者目标面积非常小的数据 valid_ids = _coco_remove_images_without_annotations(self.coco, ids) self.ids = valid_ids else: self.ids = ids def parse_targets(self, img_id: int, coco_targets: list, w: int = None, h: int = None): assert w > 0 assert h > 0 # 只筛选出单个对象的情况 anno = [obj for obj in coco_targets if obj['iscrowd'] == 0] boxes = [obj["bbox"] for obj in anno] # guard against no boxes via resizing boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax] boxes[:, 2:] += boxes[:, :2] boxes[:, 0::2].clamp_(min=0, max=w) boxes[:, 1::2].clamp_(min=0, max=h) classes = [obj["category_id"] for obj in anno] classes = torch.tensor(classes, dtype=torch.int64) area = torch.tensor([obj["area"] for obj in anno]) iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) # 筛选出合法的目标,即x_max>x_min且y_max>y_min keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) boxes = boxes[keep] classes = classes[keep] area = area[keep] iscrowd = iscrowd[keep] target = {} target["boxes"] = boxes target["labels"] = classes target["image_id"] = torch.tensor([img_id]) # for conversion to coco api target["area"] = area target["iscrowd"] = iscrowd return target def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) coco_target = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] img = Image.open(os.path.join(self.img_root, path)).convert('RGB') w, h = img.size target = self.parse_targets(img_id, coco_target, w, h) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.ids) def get_height_and_width(self, index): coco = self.coco img_id = self.ids[index] img_info = coco.loadImgs(img_id)[0] w = img_info["width"] h = img_info["height"] return h, w @staticmethod def collate_fn(batch): return tuple(zip(*batch)) # train = CocoDetection("/data/coco_data/", dataset="train") # print(len(train)) # t = train[0] # print(t) ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/__init__.py ================================================ from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor from .rpn_function import AnchorsGenerator ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/boxes.py ================================================ import torch from typing import Tuple from torch import Tensor import torchvision def nms(boxes, scores, iou_threshold): # type: (Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). NMS iteratively removes lower scoring boxes which have an IoU greater than iou_threshold with another (higher scoring) box. Parameters ---------- boxes : Tensor[N, 4]) boxes to perform NMS on. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes iou_threshold : float discards all overlapping boxes with IoU > iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ return torch.ops.torchvision.nms(boxes, scores, iou_threshold) def batched_nms(boxes, scores, idxs, iou_threshold): # type: (Tensor, Tensor, Tensor, float) -> Tensor """ Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Parameters ---------- boxes : Tensor[N, 4] boxes where NMS will be performed. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes idxs : Tensor[N] indices of the categories for each one of the boxes. iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0,), dtype=torch.int64, device=boxes.device) # strategy: in order to perform NMS independently per class. # we add an offset to all the boxes. The offset is dependent # only on the class idx, and is large enough so that boxes # from different classes do not overlap # 获取所有boxes中最大的坐标值(xmin, ymin, xmax, ymax) max_coordinate = boxes.max() # to(): Performs Tensor dtype and/or device conversion # 为每一个类别/每一层生成一个很大的偏移量 # 这里的to只是让生成tensor的dytpe和device与boxes保持一致 offsets = idxs.to(boxes) * (max_coordinate + 1) # boxes加上对应层的偏移量后,保证不同类别/层之间boxes不会有重合的现象 boxes_for_nms = boxes + offsets[:, None] keep = nms(boxes_for_nms, scores, iou_threshold) return keep def remove_small_boxes(boxes, min_size): # type: (Tensor, float) -> Tensor """ Remove boxes which contains at least one side smaller than min_size. 移除宽高小于指定阈值的索引 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format min_size (float): minimum size Returns: keep (Tensor[K]): indices of the boxes that have both sides larger than min_size """ ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # 预测boxes的宽和高 # keep = (ws >= min_size) & (hs >= min_size) # 当满足宽,高都大于给定阈值时为True keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size)) # nonzero(): Returns a tensor containing the indices of all non-zero elements of input # keep = keep.nonzero().squeeze(1) keep = torch.where(keep)[0] return keep def clip_boxes_to_image(boxes, size): # type: (Tensor, Tuple[int, int]) -> Tensor """ Clip boxes so that they lie inside an image of size `size`. 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 Arguments: boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format size (Tuple[height, width]): size of the image Returns: clipped_boxes (Tensor[N, 4]) """ dim = boxes.dim() boxes_x = boxes[..., 0::2] # x1, x2 boxes_y = boxes[..., 1::2] # y1, y2 height, width = size if torchvision._is_tracing(): boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device)) else: boxes_x = boxes_x.clamp(min=0, max=width) # 限制x坐标范围在[0,width]之间 boxes_y = boxes_y.clamp(min=0, max=height) # 限制y坐标范围在[0,height]之间 clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) return clipped_boxes.reshape(boxes.shape) def box_area(boxes): """ Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. Arguments: boxes (Tensor[N, 4]): boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format Returns: area (Tensor[N]): area for each box """ return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def box_iou(boxes1, boxes2): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: boxes1 (Tensor[N, 4]) boxes2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ area1 = box_area(boxes1) area2 = box_area(boxes2) # When the shapes do not match, # the shape of the returned output tensor follows the broadcasting rules lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # left-top [N,M,2] rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # right-bottom [N,M,2] wh = (rb - lt).clamp(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] iou = inter / (area1[:, None] + area2 - inter) return iou ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/det_utils.py ================================================ import torch import math from typing import List, Tuple from torch import Tensor class BalancedPositiveNegativeSampler(object): """ This class samples batches, ensuring that they contain a fixed proportion of positives """ def __init__(self, batch_size_per_image, positive_fraction): # type: (int, float) -> None """ Arguments: batch_size_per_image (int): number of elements to be selected per image positive_fraction (float): percentage of positive elements per batch """ self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction def __call__(self, matched_idxs): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ Arguments: matched idxs: list of tensors containing -1, 0 or positive values. Each tensor corresponds to a specific image. -1 values are ignored, 0 are considered as negatives and > 0 as positives. Returns: pos_idx (list[tensor]) neg_idx (list[tensor]) Returns two lists of binary masks for each image. The first list contains the positive elements that were selected, and the second list the negative example. """ pos_idx = [] neg_idx = [] # 遍历每张图像的matched_idxs for matched_idxs_per_image in matched_idxs: # >= 1的为正样本, nonzero返回非零元素索引 # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0] # = 0的为负样本 # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0] # 指定正样本的数量 num_pos = int(self.batch_size_per_image * self.positive_fraction) # protect against not enough positive examples # 如果正样本数量不够就直接采用所有正样本 num_pos = min(positive.numel(), num_pos) # 指定负样本数量 num_neg = self.batch_size_per_image - num_pos # protect against not enough negative examples # 如果负样本数量不够就直接采用所有负样本 num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples # Returns a random permutation of integers from 0 to n - 1. # 随机选择指定数量的正负样本 perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] pos_idx_per_image = positive[perm1] neg_idx_per_image = negative[perm2] # create binary mask from indices pos_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) neg_idx_per_image_mask = torch.zeros_like( matched_idxs_per_image, dtype=torch.uint8 ) pos_idx_per_image_mask[pos_idx_per_image] = 1 neg_idx_per_image_mask[neg_idx_per_image] = 1 pos_idx.append(pos_idx_per_image_mask) neg_idx.append(neg_idx_per_image_mask) return pos_idx, neg_idx @torch.jit._script_if_tracing def encode_boxes(reference_boxes, proposals, weights): # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes(gt) proposals (Tensor): boxes to be encoded(anchors) weights: """ # perform some unpacking to make it JIT-fusion friendly wx = weights[0] wy = weights[1] ww = weights[2] wh = weights[3] # unsqueeze() # Returns a new tensor with a dimension of size one inserted at the specified position. proposals_x1 = proposals[:, 0].unsqueeze(1) proposals_y1 = proposals[:, 1].unsqueeze(1) proposals_x2 = proposals[:, 2].unsqueeze(1) proposals_y2 = proposals[:, 3].unsqueeze(1) reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1) reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1) reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1) reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1) # implementation starts here # parse widths and heights ex_widths = proposals_x2 - proposals_x1 ex_heights = proposals_y2 - proposals_y1 # parse coordinate of center point ex_ctr_x = proposals_x1 + 0.5 * ex_widths ex_ctr_y = proposals_y1 + 0.5 * ex_heights gt_widths = reference_boxes_x2 - reference_boxes_x1 gt_heights = reference_boxes_y2 - reference_boxes_y1 gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * torch.log(gt_widths / ex_widths) targets_dh = wh * torch.log(gt_heights / ex_heights) targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets class BoxCoder(object): """ This class encodes and decodes a set of bounding boxes into the representation used for training the regressors. """ def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): # type: (Tuple[float, float, float, float], float) -> None """ Arguments: weights (4-element tuple) bbox_xform_clip (float) """ self.weights = weights self.bbox_xform_clip = bbox_xform_clip def encode(self, reference_boxes, proposals): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 结合anchors和与之对应的gt计算regression参数 Args: reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes proposals: List[Tensor] anchors/proposals Returns: regression parameters """ # 统计每张图像的anchors个数,方便后面拼接在一起处理后在分开 # reference_boxes和proposal数据结构相同 boxes_per_image = [len(b) for b in reference_boxes] reference_boxes = torch.cat(reference_boxes, dim=0) proposals = torch.cat(proposals, dim=0) # targets_dx, targets_dy, targets_dw, targets_dh targets = self.encode_single(reference_boxes, proposals) return targets.split(boxes_per_image, 0) def encode_single(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ dtype = reference_boxes.dtype device = reference_boxes.device weights = torch.as_tensor(self.weights, dtype=dtype, device=device) targets = encode_boxes(reference_boxes, proposals, weights) return targets def decode(self, rel_codes, boxes): # type: (Tensor, List[Tensor]) -> Tensor """ Args: rel_codes: bbox regression parameters boxes: anchors/proposals Returns: """ assert isinstance(boxes, (list, tuple)) assert isinstance(rel_codes, torch.Tensor) boxes_per_image = [b.size(0) for b in boxes] concat_boxes = torch.cat(boxes, dim=0) box_sum = 0 for val in boxes_per_image: box_sum += val # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标 pred_boxes = self.decode_single( rel_codes, concat_boxes ) # 防止pred_boxes为空时导致reshape报错 if box_sum > 0: pred_boxes = pred_boxes.reshape(box_sum, -1, 4) return pred_boxes def decode_single(self, rel_codes, boxes): """ From a set of original boxes and encoded relative box offsets, get the decoded boxes. Arguments: rel_codes (Tensor): encoded boxes (bbox regression parameters) boxes (Tensor): reference boxes (anchors/proposals) """ boxes = boxes.to(rel_codes.dtype) # xmin, ymin, xmax, ymax widths = boxes[:, 2] - boxes[:, 0] # anchor/proposal宽度 heights = boxes[:, 3] - boxes[:, 1] # anchor/proposal高度 ctr_x = boxes[:, 0] + 0.5 * widths # anchor/proposal中心x坐标 ctr_y = boxes[:, 1] + 0.5 * heights # anchor/proposal中心y坐标 wx, wy, ww, wh = self.weights # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5] dx = rel_codes[:, 0::4] / wx # 预测anchors/proposals的中心坐标x回归参数 dy = rel_codes[:, 1::4] / wy # 预测anchors/proposals的中心坐标y回归参数 dw = rel_codes[:, 2::4] / ww # 预测anchors/proposals的宽度回归参数 dh = rel_codes[:, 3::4] / wh # 预测anchors/proposals的高度回归参数 # limit max value, prevent sending too large values into torch.exp() # self.bbox_xform_clip=math.log(1000. / 16) 4.135 dw = torch.clamp(dw, max=self.bbox_xform_clip) dh = torch.clamp(dh, max=self.bbox_xform_clip) pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] pred_w = torch.exp(dw) * widths[:, None] pred_h = torch.exp(dh) * heights[:, None] # xmin pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymin pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h # xmax pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w # ymax pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1) return pred_boxes class Matcher(object): BELOW_LOW_THRESHOLD = -1 BETWEEN_THRESHOLDS = -2 __annotations__ = { 'BELOW_LOW_THRESHOLD': int, 'BETWEEN_THRESHOLDS': int, } def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): # type: (float, float, bool) -> None """ Args: high_threshold (float): quality values greater than or equal to this value are candidate matches. low_threshold (float): a lower quality threshold used to stratify matches into three levels: 1) matches >= high_threshold 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) allow_low_quality_matches (bool): if True, produce additional matches for predictions that have only low-quality match candidates. See set_low_quality_matches_ for more details. """ self.BELOW_LOW_THRESHOLD = -1 self.BETWEEN_THRESHOLDS = -2 assert low_threshold <= high_threshold self.high_threshold = high_threshold # 0.7 self.low_threshold = low_threshold # 0.3 self.allow_low_quality_matches = allow_low_quality_matches def __call__(self, match_quality_matrix): """ 计算anchors与每个gtboxes匹配的iou最大值,并记录索引, iou= self.low_threshold) & ( matched_vals < self.high_threshold ) # iou小于low_threshold的matches索引置为-1 matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD # -1 # iou在[low_threshold, high_threshold]之间的matches索引置为-2 matches[between_thresholds] = self.BETWEEN_THRESHOLDS # -2 if self.allow_low_quality_matches: assert all_matches is not None self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) return matches def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): """ Produce additional matches for predictions that have only low-quality matches. Specifically, for each ground-truth find the set of predictions that have maximum overlap with it (including ties); for each prediction in that set, if it is unmatched, then match it to the ground-truth with which it has the highest quality value. """ # For each gt, find the prediction with which it has highest quality # 对于每个gt boxes寻找与其iou最大的anchor, # highest_quality_foreach_gt为匹配到的最大iou值 highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) # the dimension to reduce. # Find highest quality match available, even if it is low, including ties # 寻找每个gt boxes与其iou最大的anchor索引,一个gt匹配到的最大iou可能有多个anchor # gt_pred_pairs_of_highest_quality = torch.nonzero( # match_quality_matrix == highest_quality_foreach_gt[:, None] # ) gt_pred_pairs_of_highest_quality = torch.where( torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None]) ) # Example gt_pred_pairs_of_highest_quality: # tensor([[ 0, 39796], # [ 1, 32055], # [ 1, 32070], # [ 2, 39190], # [ 2, 40255], # [ 3, 40390], # [ 3, 41455], # [ 4, 45470], # [ 5, 45325], # [ 5, 46390]]) # Each row is a (gt index, prediction index) # Note how gt items 1, 2, 3, and 5 each have two ties # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要) # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] pre_inds_to_update = gt_pred_pairs_of_highest_quality[1] # 保留该anchor匹配gt最大iou的索引,即使iou低于设定的阈值 matches[pre_inds_to_update] = all_matches[pre_inds_to_update] def smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter """ n = torch.abs(input - target) # cond = n < beta cond = torch.lt(n, beta) loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum() ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/faster_rcnn_framework.py ================================================ import warnings from collections import OrderedDict from typing import Tuple, List, Dict, Optional, Union import torch from torch import nn, Tensor import torch.nn.functional as F from torchvision.ops import MultiScaleRoIAlign from .roi_head import RoIHeads from .transform import GeneralizedRCNNTransform from .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork class FasterRCNNBase(nn.Module): """ Main class for Generalized R-CNN. Arguments: backbone (nn.Module): rpn (nn.Module): roi_heads (nn.Module): takes the features + the proposals from the RPN and computes detections / masks from it. transform (nn.Module): performs the data transformation from the inputs to feed into the model """ def __init__(self, backbone, rpn, roi_heads, transform): super(FasterRCNNBase, self).__init__() self.transform = transform self.backbone = backbone self.rpn = rpn self.roi_heads = roi_heads # used only on torchscript mode self._has_warned = False @torch.jit.unused def eager_outputs(self, losses, detections): # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]] if self.training: return losses return detections def forward(self, images, targets=None): # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]] """ Arguments: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if self.training: assert targets is not None for target in targets: # 进一步判断传入的target的boxes参数是否符合规定 boxes = target["boxes"] if isinstance(boxes, torch.Tensor): if len(boxes.shape) != 2 or boxes.shape[-1] != 4: raise ValueError("Expected target boxes to be a tensor" "of shape [N, 4], got {:}.".format( boxes.shape)) else: raise ValueError("Expected target boxes to be of type " "Tensor, got {:}.".format(type(boxes))) original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) for img in images: val = img.shape[-2:] assert len(val) == 2 # 防止输入的是个一维向量 original_image_sizes.append((val[0], val[1])) # original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) # 对图像进行预处理 # print(images.tensors.shape) features = self.backbone(images.tensors) # 将图像输入backbone得到特征图 if isinstance(features, torch.Tensor): # 若只在一层特征层上预测,将feature放入有序字典中,并编号为‘0’ features = OrderedDict([('0', features)]) # 若在多层特征层上预测,传入的就是一个有序字典 # 将特征层以及标注target信息传入rpn中 # proposals: List[Tensor], Tensor_shape: [num_proposals, 4], # 每个proposals是绝对坐标,且为(x1, y1, x2, y2)格式 proposals, proposal_losses = self.rpn(images, features, targets) # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分 detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) # 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) losses = {} losses.update(detector_losses) losses.update(proposal_losses) if torch.jit.is_scripting(): if not self._has_warned: warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting") self._has_warned = True return losses, detections else: return self.eager_outputs(losses, detections) # if self.training: # return losses # # return detections class TwoMLPHead(nn.Module): """ Standard heads for FPN-based models Arguments: in_channels (int): number of input channels representation_size (int): size of the intermediate representation """ def __init__(self, in_channels, representation_size): super(TwoMLPHead, self).__init__() self.fc6 = nn.Linear(in_channels, representation_size) self.fc7 = nn.Linear(representation_size, representation_size) def forward(self, x): x = x.flatten(start_dim=1) x = F.relu(self.fc6(x)) x = F.relu(self.fc7(x)) return x class FastRCNNPredictor(nn.Module): """ Standard classification + bounding box regression layers for Fast R-CNN. Arguments: in_channels (int): number of input channels num_classes (int): number of output classes (including background) """ def __init__(self, in_channels, num_classes): super(FastRCNNPredictor, self).__init__() self.cls_score = nn.Linear(in_channels, num_classes) self.bbox_pred = nn.Linear(in_channels, num_classes * 4) def forward(self, x): if x.dim() == 4: assert list(x.shape[2:]) == [1, 1] x = x.flatten(start_dim=1) scores = self.cls_score(x) bbox_deltas = self.bbox_pred(x) return scores, bbox_deltas class FasterRCNN(FasterRCNNBase): """ Implements Faster R-CNN. The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each image, and should be in 0-1 range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values between 0 and H and 0 and W - labels (Int64Tensor[N]): the class label for each ground-truth box The model returns a Dict[Tensor] during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between 0 and H and 0 and W - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction Arguments: backbone (nn.Module): the network used to compute the features for the model. It should contain a out_channels attribute, which indicates the number of output channels that each feature map has (and it should be the same for all feature maps). The backbone should return a single Tensor or and OrderedDict[Tensor]. num_classes (int): number of output classes of the model (including the background). If box_predictor is specified, num_classes should be None. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone image_mean (Tuple[float, float, float]): mean values used for input normalization. They are generally the mean values of the dataset on which the backbone has been trained on image_std (Tuple[float, float, float]): std values used for input normalization. They are generally the std values of the dataset on which the backbone has been trained on rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN rpn_score_thresh (float): during inference, only return proposals with a classification score greater than rpn_score_thresh box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in the locations indicated by the bounding boxes box_head (nn.Module): module that takes the cropped feature maps as input box_predictor (nn.Module): module that takes the output of box_head and returns the classification logits and box regression deltas. box_score_thresh (float): during inference, only return proposals with a classification score greater than box_score_thresh box_nms_thresh (float): NMS threshold for the prediction head. Used during inference box_detections_per_img (int): maximum number of detections per image, for all classes. box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be considered as positive during training of the classification head box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be considered as negative during training of the classification head box_batch_size_per_image (int): number of proposals that are sampled during training of the classification head box_positive_fraction (float): proportion of positive proposals in a mini-batch during training of the classification head bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the bounding boxes """ def __init__(self, backbone, num_classes=None, # transform parameter min_size=800, max_size=1333, # 预处理resize时限制的最小尺寸与最大尺寸 image_mean=None, image_std=None, # 预处理normalize时使用的均值和方差 # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, # rpn中在nms处理前保留的proposal数(根据score) rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, # rpn中在nms处理后保留的proposal数 rpn_nms_thresh=0.7, # rpn中进行nms处理时使用的iou阈值 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, # rpn计算损失时,采集正负样本设置的阈值 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # rpn计算损失时采样的样本数,以及正样本占总样本的比例 rpn_score_thresh=0.0, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, # 移除低目标概率 fast rcnn中进行nms处理的阈值 对预测结果根据score排序取前100个目标 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, # fast rcnn计算误差时,采集正负样本设置的阈值 box_batch_size_per_image=512, box_positive_fraction=0.25, # fast rcnn计算误差时采样的样本数,以及正样本占所有样本的比例 bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels" "specifying the number of output channels (assumed to be the" "same for all the levels" ) assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError("num_classes should be None when box_predictor " "is specified") else: if box_predictor is None: raise ValueError("num_classes should not be None when box_predictor " "is not specified") # 预测特征层的channels out_channels = backbone.out_channels # 若anchor生成器为空,则自动生成针对resnet50_fpn的anchor生成器 if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorsGenerator( anchor_sizes, aspect_ratios ) # 生成RPN通过滑动窗口预测网络部分 if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0] ) # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000, # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000, rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) # 定义整个RPN框架 rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) # Multi-scale RoIAlign pooling if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], # 在哪些特征层进行roi pooling output_size=[7, 7], sampling_ratio=2) # fast RCNN中roi pooling后的展平处理两个全连接层部分 if box_head is None: resolution = box_roi_pool.output_size[0] # 默认等于7 representation_size = 1024 box_head = TwoMLPHead( out_channels * resolution ** 2, representation_size ) # 在box_head的输出上预测部分 if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( representation_size, num_classes) # 将roi pooling, box_head以及box_predictor结合在一起 roi_heads = RoIHeads( # box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, # 0.5 0.5 box_batch_size_per_image, box_positive_fraction, # 512 0.25 bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) # 0.05 0.5 100 if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] # 对数据进行标准化,缩放,打包成batch等处理部分 transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform) ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/image_list.py ================================================ from typing import List, Tuple from torch import Tensor class ImageList(object): """ Structure that holds a list of images (of possibly varying sizes) as a single tensor. This works by padding the images to the same size, and storing in a field the original sizes of each image """ def __init__(self, tensors, image_sizes): # type: (Tensor, List[Tuple[int, int]]) -> None """ Arguments: tensors (tensor) padding后的图像数据 image_sizes (list[tuple[int, int]]) padding前的图像尺寸 """ self.tensors = tensors self.image_sizes = image_sizes def to(self, device): # type: (Device) -> ImageList # noqa cast_tensor = self.tensors.to(device) return ImageList(cast_tensor, self.image_sizes) ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/roi_head.py ================================================ from typing import Optional, List, Dict, Tuple import torch from torch import Tensor import torch.nn.functional as F from . import det_utils from . import boxes as box_ops def fastrcnn_loss(class_logits, box_regression, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ Computes the loss for Faster R-CNN. Arguments: class_logits : 预测类别概率信息,shape=[num_anchors, num_classes] box_regression : 预测边目标界框回归信息 labels : 真实类别信息 regression_targets : 真实目标边界框信息 Returns: classification_loss (Tensor) box_loss (Tensor) """ labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) # 计算类别损失信息 classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing # 返回标签类别大于0的索引 # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1) sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0] # 返回标签类别大于0位置的类别信息 labels_pos = labels[sampled_pos_inds_subset] # shape=[num_proposal, num_classes] N, num_classes = class_logits.shape box_regression = box_regression.reshape(N, -1, 4) # 计算边界框损失信息 box_loss = det_utils.smooth_l1_loss( # 获取指定索引proposal的指定类别box信息 box_regression[sampled_pos_inds_subset, labels_pos], regression_targets[sampled_pos_inds_subset], beta=1 / 9, size_average=False, ) / labels.numel() return classification_loss, box_loss class RoIHeads(torch.nn.Module): __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, } def __init__(self, box_roi_pool, # Multi-scale RoIAlign pooling box_head, # TwoMLPHead box_predictor, # FastRCNNPredictor # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, # default: 0.5, 0.5 batch_size_per_image, positive_fraction, # default: 512, 0.25 bbox_reg_weights, # None # Faster R-CNN inference score_thresh, # default: 0.05 nms_thresh, # default: 0.5 detection_per_img): # default: 100 super(RoIHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, # default: 0.5 bg_iou_thresh, # default: 0.5 allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, # default: 512 positive_fraction) # default: 0.25 if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool # Multi-scale RoIAlign pooling self.box_head = box_head # TwoMLPHead self.box_predictor = box_predictor # FastRCNNPredictor self.score_thresh = score_thresh # default: 0.05 self.nms_thresh = nms_thresh # default: 0.5 self.detection_per_img = detection_per_img # default: 100 def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels): # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] """ 为每个proposal匹配对应的gt_box,并划分到正负样本中 Args: proposals: gt_boxes: gt_labels: Returns: """ matched_idxs = [] labels = [] # 遍历每张图像的proposals, gt_boxes, gt_labels信息 for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels): if gt_boxes_in_image.numel() == 0: # 该张图像中没有gt框,为背景 # background image device = proposals_in_image.device clamped_matched_idxs_in_image = torch.zeros( (proposals_in_image.shape[0],), dtype=torch.int64, device=device ) labels_in_image = torch.zeros( (proposals_in_image.shape[0],), dtype=torch.int64, device=device ) else: # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands # 计算proposal与每个gt_box的iou重合度 match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image) # 计算proposal与每个gt_box匹配的iou最大值,并记录索引, # iou < low_threshold索引值为 -1, low_threshold <= iou < high_threshold索引值为 -2 matched_idxs_in_image = self.proposal_matcher(match_quality_matrix) # 限制最小值,防止匹配标签时出现越界的情况 # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别(实际上并不是),后续会进一步处理 clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0) # 获取proposal匹配到的gt对应标签 labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image] labels_in_image = labels_in_image.to(dtype=torch.int64) # label background (below the low threshold) # 将gt索引为-1的类别设置为0,即背景,负样本 bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD # -1 labels_in_image[bg_inds] = 0 # label ignore proposals (between low and high threshold) # 将gt索引为-2的类别设置为-1, 即废弃样本 ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS # -2 labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler matched_idxs.append(clamped_matched_idxs_in_image) labels.append(labels_in_image) return matched_idxs, labels def subsample(self, labels): # type: (List[Tensor]) -> List[Tensor] # BalancedPositiveNegativeSampler sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_inds = [] # 遍历每张图片的正负样本索引 for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)): # 记录所有采集样本索引(包括正样本和负样本) # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0] sampled_inds.append(img_sampled_inds) return sampled_inds def add_gt_proposals(self, proposals, gt_boxes): # type: (List[Tensor], List[Tensor]) -> List[Tensor] """ 将gt_boxes拼接到proposal后面 Args: proposals: 一个batch中每张图像rpn预测的boxes gt_boxes: 一个batch中每张图像对应的真实目标边界框 Returns: """ proposals = [ torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] return proposals def check_targets(self, targets): # type: (Optional[List[Dict[str, Tensor]]]) -> None assert targets is not None assert all(["boxes" in t for t in targets]) assert all(["labels" in t for t in targets]) def select_training_samples(self, proposals, # type: List[Tensor] targets # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] """ 划分正负样本,统计对应gt的标签以及边界框回归信息 list元素个数为batch_size Args: proposals: rpn预测的boxes targets: Returns: """ # 检查target数据是否为空 self.check_targets(targets) # 如果不加这句,jit.script会不通过(看不懂) assert targets is not None dtype = proposals[0].dtype device = proposals[0].device # 获取标注好的boxes以及labels信息 gt_boxes = [t["boxes"].to(dtype) for t in targets] gt_labels = [t["labels"] for t in targets] # append ground-truth bboxes to proposal # 将gt_boxes拼接到proposal后面 proposals = self.add_gt_proposals(proposals, gt_boxes) # get matching gt indices for each proposal # 为每个proposal匹配对应的gt_box,并划分到正负样本中 matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels) # sample a fixed proportion of positive-negative proposals # 按给定数量和比例采样正负样本 sampled_inds = self.subsample(labels) matched_gt_boxes = [] num_images = len(proposals) # 遍历每张图像 for img_id in range(num_images): # 获取每张图像的正负样本索引 img_sampled_inds = sampled_inds[img_id] # 获取对应正负样本的proposals信息 proposals[img_id] = proposals[img_id][img_sampled_inds] # 获取对应正负样本的真实类别信息 labels[img_id] = labels[img_id][img_sampled_inds] # 获取对应正负样本的gt索引信息 matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds] gt_boxes_in_image = gt_boxes[img_id] if gt_boxes_in_image.numel() == 0: gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device) # 获取对应正负样本的gt box信息 matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]]) # 根据gt和proposal计算边框回归参数(针对gt的) regression_targets = self.box_coder.encode(matched_gt_boxes, proposals) return proposals, labels, regression_targets def postprocess_detections(self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] """ 对网络的预测数据进行后处理,包括 (1)根据proposal以及预测的回归参数计算出最终bbox坐标 (2)对预测类别结果进行softmax处理 (3)裁剪预测的boxes信息,将越界的坐标调整到图片边界上 (4)移除所有背景信息 (5)移除低概率目标 (6)移除小尺寸目标 (7)执行nms处理,并按scores进行排序 (8)根据scores排序返回前topk个目标 Args: class_logits: 网络预测类别概率信息 box_regression: 网络预测的边界框回归参数 proposals: rpn输出的proposal image_shapes: 打包成batch前每张图像的宽高 Returns: """ device = class_logits.device # 预测目标类别数 num_classes = class_logits.shape[-1] # 获取每张图像的预测bbox数量 boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] # 根据proposal以及预测的回归参数计算出最终bbox坐标 pred_boxes = self.box_coder.decode(box_regression, proposals) # 对预测类别结果进行softmax处理 pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image # 根据每张图像的预测bbox数量分割结果 pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] # 遍历每张图像预测信息 for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): # 裁剪预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove prediction with the background label # 移除索引为0的所有信息(0代表背景) boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes # 移除低概率目标,self.scores_thresh=0.05 # gt: Computes input > other element-wise. # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1) inds = torch.where(torch.gt(scores, self.score_thresh))[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes # 移除小目标 keep = box_ops.remove_small_boxes(boxes, min_size=1.) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximun suppression, independently done per class # 执行nms处理,执行后的结果会按照scores从大到小进行排序返回 keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions # 获取scores排在前topk个预测目标 keep = keep[:self.detection_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels def forward(self, features, # type: Dict[str, Tensor] proposals, # type: List[Tensor] image_shapes, # type: List[Tuple[int, int]] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]] """ Arguments: features (List[Tensor]) proposals (List[Tensor[N, 4]]) image_shapes (List[Tuple[H, W]]) targets (List[Dict]) """ # 检查targets的数据类型是否正确 if targets is not None: for t in targets: floating_point_types = (torch.float, torch.double, torch.half) assert t["boxes"].dtype in floating_point_types, "target boxes must of float type" assert t["labels"].dtype == torch.int64, "target labels must of int64 type" if self.training: # 划分正负样本,统计对应gt的标签以及边界框回归信息 proposals, labels, regression_targets = self.select_training_samples(proposals, targets) else: labels = None regression_targets = None # 将采集样本通过Multi-scale RoIAlign pooling层 # box_features_shape: [num_proposals, channel, height, width] box_features = self.box_roi_pool(features, proposals, image_shapes) # 通过roi_pooling后的两层全连接层 # box_features_shape: [num_proposals, representation_size] box_features = self.box_head(box_features) # 接着分别预测目标类别和边界框回归参数 class_logits, box_regression = self.box_predictor(box_features) result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) losses = {} if self.training: assert labels is not None and regression_targets is not None loss_classifier, loss_box_reg = fastrcnn_loss( class_logits, box_regression, labels, regression_targets) losses = { "loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg } else: boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) num_images = len(boxes) for i in range(num_images): result.append( { "boxes": boxes[i], "labels": labels[i], "scores": scores[i], } ) return result, losses ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/rpn_function.py ================================================ from typing import List, Optional, Dict, Tuple import torch from torch import nn, Tensor from torch.nn import functional as F import torchvision from . import det_utils from . import boxes as box_ops from .image_list import ImageList @torch.jit.unused def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n): # type: (Tensor, int) -> Tuple[int, int] from torch.onnx import operators num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) pre_nms_top_n = torch.min(torch.cat( (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), num_anchors), 0)) return num_anchors, pre_nms_top_n class AnchorsGenerator(nn.Module): __annotations__ = { "cell_anchors": Optional[List[torch.Tensor]], "_cache": Dict[str, List[torch.Tensor]] } """ anchors生成器 Module that generates anchors for a set of feature maps and image sizes. The module support computing anchors at multiple sizes and aspect ratios per feature map. sizes and aspect_ratios should have the same number of elements, and it should correspond to the number of feature maps. sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors per spatial location for feature map i. Arguments: sizes (Tuple[Tuple[int]]): aspect_ratios (Tuple[Tuple[float]]): """ def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)): super(AnchorsGenerator, self).__init__() if not isinstance(sizes[0], (list, tuple)): # TODO change this sizes = tuple((s,) for s in sizes) if not isinstance(aspect_ratios[0], (list, tuple)): aspect_ratios = (aspect_ratios,) * len(sizes) assert len(sizes) == len(aspect_ratios) self.sizes = sizes self.aspect_ratios = aspect_ratios self.cell_anchors = None self._cache = {} def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")): # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor """ compute anchor sizes Arguments: scales: sqrt(anchor_area) aspect_ratios: h/w ratios dtype: float32 device: cpu/gpu """ scales = torch.as_tensor(scales, dtype=dtype, device=device) aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) h_ratios = torch.sqrt(aspect_ratios) w_ratios = 1.0 / h_ratios # [r1, r2, r3]' * [s1, s2, s3] # number of elements is len(ratios)*len(scales) ws = (w_ratios[:, None] * scales[None, :]).view(-1) hs = (h_ratios[:, None] * scales[None, :]).view(-1) # left-top, right-bottom coordinate relative to anchor center(0, 0) # 生成的anchors模板都是以(0, 0)为中心的, shape [len(ratios)*len(scales), 4] base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 return base_anchors.round() # round 四舍五入 def set_cell_anchors(self, dtype, device): # type: (torch.dtype, torch.device) -> None if self.cell_anchors is not None: cell_anchors = self.cell_anchors assert cell_anchors is not None # suppose that all anchors have the same device # which is a valid assumption in the current state of the codebase if cell_anchors[0].device == device: return # 根据提供的sizes和aspect_ratios生成anchors模板 # anchors模板都是以(0, 0)为中心的anchor cell_anchors = [ self.generate_anchors(sizes, aspect_ratios, dtype, device) for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios) ] self.cell_anchors = cell_anchors def num_anchors_per_location(self): # 计算每个预测特征层上每个滑动窗口的预测目标数 return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. def grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """ anchors position in grid coordinate axis map into origin image 计算预测特征图对应原始图像上的所有anchors的坐标 Args: grid_sizes: 预测特征矩阵的height和width strides: 预测特征矩阵上一步对应原始图像上的步距 """ anchors = [] cell_anchors = self.cell_anchors assert cell_anchors is not None # 遍历每个预测特征层的grid_size,strides和cell_anchors for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): grid_height, grid_width = size stride_height, stride_width = stride device = base_anchors.device # For output anchor, compute [x_center, y_center, x_center, y_center] # shape: [grid_width] 对应原图上的x坐标(列) shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width # shape: [grid_height] 对应原图上的y坐标(行) shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量) # torch.meshgrid函数分别传入行坐标和列坐标,生成网格行坐标矩阵和网格列坐标矩阵 # shape: [grid_height, grid_width] shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量 # shape: [grid_width*grid_height, 4] shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1) # For every (base anchor, output anchor) pair, # offset each zero-centered base anchor by the center of the output anchor. # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制) shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4) anchors.append(shifts_anchor.reshape(-1, 4)) return anchors # List[Tensor(all_num_anchors, 4)] def cached_grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """将计算得到的所有anchors信息进行缓存""" key = str(grid_sizes) + str(strides) # self._cache是字典类型 if key in self._cache: return self._cache[key] anchors = self.grid_anchors(grid_sizes, strides) self._cache[key] = anchors return anchors def forward(self, image_list, feature_maps): # type: (ImageList, List[Tensor]) -> List[Tensor] # 获取每个预测特征层的尺寸(height, width) grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps]) # 获取输入图像的height和width image_size = image_list.tensors.shape[-2:] # 获取变量类型和设备类型 dtype, device = feature_maps[0].dtype, feature_maps[0].device # one step in feature map equate n pixel stride in origin image # 计算特征层上的一步等于原始图像上的步长 strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device), torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes] # 根据提供的sizes和aspect_ratios生成anchors模板 self.set_cell_anchors(dtype, device) # 计算/读取所有anchors的坐标信息(这里的anchors信息是映射到原图上的所有anchors信息,不是anchors模板) # 得到的是一个list列表,对应每张预测特征图映射回原图的anchors坐标信息 anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # 遍历一个batch中的每张图像 for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image = [] # 遍历每张预测特征图映射回原图的anchors坐标信息 for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起 # anchors是个list,每个元素为一张图像的所有anchors信息 anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] # Clear the cache in case that memory leaks. self._cache.clear() return anchors class RPNHead(nn.Module): """ add a RPN head with classification and regression 通过滑动窗口计算预测目标概率与bbox regression参数 Arguments: in_channels: number of channels of the input feature num_anchors: number of anchors to be predicted """ def __init__(self, in_channels, num_anchors): super(RPNHead, self).__init__() # 3x3 滑动窗口 self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) # 计算预测的目标分数(这里的目标只是指前景或者背景) self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) # 计算预测的目标bbox regression参数 self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1) for layer in self.children(): if isinstance(layer, nn.Conv2d): torch.nn.init.normal_(layer.weight, std=0.01) torch.nn.init.constant_(layer.bias, 0) def forward(self, x): # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]] logits = [] bbox_reg = [] for i, feature in enumerate(x): t = F.relu(self.conv(feature)) logits.append(self.cls_logits(t)) bbox_reg.append(self.bbox_pred(t)) return logits, bbox_reg def permute_and_flatten(layer, N, A, C, H, W): # type: (Tensor, int, int, int, int, int) -> Tensor """ 调整tensor顺序,并进行reshape Args: layer: 预测特征层上预测的目标概率或bboxes regression参数 N: batch_size A: anchors_num_per_position C: classes_num or 4(bbox coordinate) H: height W: width Returns: layer: 调整tensor顺序,并reshape后的结果[N, -1, C] """ # view和reshape功能是一样的,先展平所有元素在按照给定shape排列 # view函数只能用于内存中连续存储的tensor,permute等操作会使tensor在内存中变得不再连续,此时就不能再调用view函数 # reshape则不需要依赖目标tensor是否在内存中是连续的 # [batch_size, anchors_num_per_position * (C or 4), height, width] layer = layer.view(N, -1, C, H, W) # 调换tensor维度 layer = layer.permute(0, 3, 4, 1, 2) # [N, H, W, -1, C] layer = layer.reshape(N, -1, C) return layer def concat_box_prediction_layers(box_cls, box_regression): # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ 对box_cla和box_regression两个list中的每个预测特征层的预测信息 的tensor排列顺序以及shape进行调整 -> [N, -1, C] Args: box_cls: 每个预测特征层上的预测目标概率 box_regression: 每个预测特征层上的预测目标bboxes regression参数 Returns: """ box_cls_flattened = [] box_regression_flattened = [] # 遍历每个预测特征层 for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression): # [batch_size, anchors_num_per_position * classes_num, height, width] # 注意,当计算RPN中的proposal时,classes_num=1,只区分目标和背景 N, AxC, H, W = box_cls_per_level.shape # # [batch_size, anchors_num_per_position * 4, height, width] Ax4 = box_regression_per_level.shape[1] # anchors_num_per_position A = Ax4 // 4 # classes_num C = AxC // A # [N, -1, C] box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W) box_cls_flattened.append(box_cls_per_level) # [N, -1, C] box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W) box_regression_flattened.append(box_regression_per_level) box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2) # start_dim, end_dim box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4) return box_cls, box_regression class RegionProposalNetwork(torch.nn.Module): """ Implements Region Proposal Network (RPN). Arguments: anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. head (nn.Module): module that computes the objectness and regression deltas fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be considered as positive during training of the RPN. bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be considered as negative during training of the RPN. batch_size_per_image (int): number of anchors that are sampled during training of the RPN for computing the loss positive_fraction (float): proportion of positive anchors in a mini-batch during training of the RPN pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should contain two fields: training and testing, to allow for different values depending on training or evaluation post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should contain two fields: training and testing, to allow for different values depending on training or evaluation nms_thresh (float): NMS threshold used for postprocessing the RPN proposals """ __annotations__ = { 'box_coder': det_utils.BoxCoder, 'proposal_matcher': det_utils.Matcher, 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, 'pre_nms_top_n': Dict[str, int], 'post_nms_top_n': Dict[str, int], } def __init__(self, anchor_generator, head, fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0): super(RegionProposalNetwork, self).__init__() self.anchor_generator = anchor_generator self.head = head self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) # use during training # 计算anchors与真实bbox的iou self.box_similarity = box_ops.box_iou self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, # 当iou大于fg_iou_thresh(0.7)时视为正样本 bg_iou_thresh, # 当iou小于bg_iou_thresh(0.3)时视为负样本 allow_low_quality_matches=True ) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction # 256, 0.5 ) # use during testing self._pre_nms_top_n = pre_nms_top_n self._post_nms_top_n = post_nms_top_n self.nms_thresh = nms_thresh self.score_thresh = score_thresh self.min_size = 1. def pre_nms_top_n(self): if self.training: return self._pre_nms_top_n['training'] return self._pre_nms_top_n['testing'] def post_nms_top_n(self): if self.training: return self._post_nms_top_n['training'] return self._post_nms_top_n['testing'] def assign_targets_to_anchors(self, anchors, targets): # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]] """ 计算每个anchors最匹配的gt,并划分为正样本,背景以及废弃的样本 Args: anchors: (List[Tensor]) targets: (List[Dict[Tensor]) Returns: labels: 标记anchors归属类别(1, 0, -1分别对应正样本,背景,废弃的样本) 注意,在RPN中只有前景和背景,所有正样本的类别都是1,0代表背景 matched_gt_boxes:与anchors匹配的gt """ labels = [] matched_gt_boxes = [] # 遍历每张图像的anchors和targets for anchors_per_image, targets_per_image in zip(anchors, targets): gt_boxes = targets_per_image["boxes"] if gt_boxes.numel() == 0: device = anchors_per_image.device matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device) labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device) else: # 计算anchors与真实bbox的iou信息 # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image) # 计算每个anchors与gt匹配iou最大的索引(如果iou<0.3索引置为-1,0.3= 0 labels_per_image = labels_per_image.to(dtype=torch.float32) # background (negative examples) bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD # -1 labels_per_image[bg_indices] = 0.0 # discard indices that are between thresholds inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS # -2 labels_per_image[inds_to_discard] = -1.0 labels.append(labels_per_image) matched_gt_boxes.append(matched_gt_boxes_per_image) return labels, matched_gt_boxes def _get_top_n_idx(self, objectness, num_anchors_per_level): # type: (Tensor, List[int]) -> Tensor """ 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 Args: objectness: Tensor(每张图像的预测目标概率信息 ) num_anchors_per_level: List(每个预测特征层上的预测的anchors个数) Returns: """ r = [] # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息 offset = 0 # 遍历每个预测特征层上的预测目标概率信息 for ob in objectness.split(num_anchors_per_level, 1): if torchvision._is_tracing(): num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n()) else: num_anchors = ob.shape[1] # 预测特征层上的预测的anchors个数 pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors) # Returns the k largest elements of the given input tensor along a given dimension _, top_n_idx = ob.topk(pre_nms_top_n, dim=1) r.append(top_n_idx + offset) offset += num_anchors return torch.cat(r, dim=1) def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]] """ 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 Args: proposals: 预测的bbox坐标 objectness: 预测的目标概率 image_shapes: batch中每张图片的size信息 num_anchors_per_level: 每个预测特征层上预测anchors的数目 Returns: """ num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) # Returns a tensor of size size filled with fill_value # levels负责记录分隔不同预测特征层上的anchors索引信息 levels = [torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level)] levels = torch.cat(levels, 0) # Expand this tensor to the same size as objectness levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值 top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] # [batch_size, 1] # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息 objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息 proposals = proposals[batch_idx, top_n_idx] objectness_prob = torch.sigmoid(objectness) final_boxes = [] final_scores = [] # 遍历每张图像的相关预测信息 for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes): # 调整预测的boxes信息,将越界的坐标调整到图片边界上 boxes = box_ops.clip_boxes_to_image(boxes, img_shape) # 返回boxes满足宽,高都大于min_size的索引 keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # 移除小概率boxes,参考下面这个链接 # https://github.com/pytorch/vision/pull/3205 keep = torch.where(torch.ge(scores, self.score_thresh))[0] # ge: >= boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[: self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ 计算RPN损失,包括类别损失(前景与背景),bbox regression损失 Arguments: objectness (Tensor):预测的前景概率 pred_bbox_deltas (Tensor):预测的bbox regression labels (List[Tensor]):真实的标签 1, 0, -1(batch中每一张图片的labels对应List的一个元素中) regression_targets (List[Tensor]):真实的bbox regression Returns: objectness_loss (Tensor) : 类别损失 box_loss (Tensor):边界框回归损失 """ # 按照给定的batch_size_per_image, positive_fraction选择正负样本 sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起,并获取非零位置的索引 # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0] # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0] # 将所有正负样本索引拼接在一起 sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness = objectness.flatten() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) # 计算边界框回归损失 box_loss = det_utils.smooth_l1_loss( pred_bbox_deltas[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1 / 9, size_average=False, ) / (sampled_inds.numel()) # 计算目标预测概率损失 objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss def forward(self, images, # type: ImageList features, # type: Dict[str, Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]] """ Arguments: images (ImageList): images for which we want to compute the predictions features (Dict[Tensor]): features computed from the images that are used for computing the predictions. Each tensor in the list correspond to different feature levels targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional). If provided, each element in the dict should contain a field `boxes`, with the locations of the ground-truth boxes. Returns: boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per image. losses (Dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. """ # RPN uses all feature maps that are available # features是所有预测特征层组成的OrderedDict features = list(features.values()) # 计算每个预测特征层上的预测目标概率和bboxes regression参数 # objectness和pred_bbox_deltas都是list objectness, pred_bbox_deltas = self.head(features) # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size anchors = self.anchor_generator(images, features) # batch_size num_images = len(anchors) # numel() Returns the total number of elements in the input tensor. # 计算每个预测特征层上的对应的anchors数量 num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness] num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors] # 调整内部tensor格式以及shape objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas) # apply pred_bbox_deltas to anchors to obtain the decoded proposals # note that we detach the deltas because Faster R-CNN do not backprop through # the proposals # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标 proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) proposals = proposals.view(num_images, -1, 4) # 筛除小boxes框,nms处理,根据预测概率获取前post_nms_top_n个目标 boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level) losses = {} if self.training: assert targets is not None # 计算每个anchors最匹配的gt,并将anchors进行分类,前景,背景以及废弃的anchors labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets) # 结合anchors以及对应的gt,计算regression参数 regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) loss_objectness, loss_rpn_box_reg = self.compute_loss( objectness, pred_bbox_deltas, labels, regression_targets ) losses = { "loss_objectness": loss_objectness, "loss_rpn_box_reg": loss_rpn_box_reg } return boxes, losses ================================================ FILE: pytorch_object_detection/train_coco_dataset/network_files/transform.py ================================================ import math from typing import List, Tuple, Dict, Optional import torch from torch import nn, Tensor import torchvision from .image_list import ImageList @torch.jit.unused def _resize_image_onnx(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor from torch.onnx import operators im_shape = operators.shape_as_tensor(image)[-2:] min_size = torch.min(im_shape).to(dtype=torch.float32) max_size = torch.max(im_shape).to(dtype=torch.float32) scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image def _resize_image(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) # 获取高宽中的最小值 max_size = float(torch.max(im_shape)) # 获取高宽中的最大值 scale_factor = self_min_size / min_size # 根据指定最小边长和图片最小边长计算缩放比例 # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长 if max_size * scale_factor > self_max_size: scale_factor = self_max_size / max_size # 将缩放比例设为指定最大边长和图片最大边长之比 # interpolate利用插值的方法缩放图片 # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W] # bilinear只支持4D Tensor image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image class GeneralizedRCNNTransform(nn.Module): """ Performs input / target transformation before feeding the data to a GeneralizedRCNN model. The transformations it perform are: - input normalization (mean subtraction and std division) - input / target resizing to match min_size / max_size It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets """ def __init__(self, min_size, max_size, image_mean, image_std): super(GeneralizedRCNNTransform, self).__init__() if not isinstance(min_size, (list, tuple)): min_size = (min_size,) self.min_size = min_size # 指定图像的最小边长范围 self.max_size = max_size # 指定图像的最大边长范围 self.image_mean = image_mean # 指定图像在标准化处理中的均值 self.image_std = image_std # 指定图像在标准化处理中的方差 def normalize(self, image): """标准化处理""" dtype, device = image.dtype, image.device mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) std = torch.as_tensor(self.image_std, dtype=dtype, device=device) # [:, None, None]: shape [3] -> [3, 1, 1] return (image - mean[:, None, None]) / std[:, None, None] def torch_choice(self, k): # type: (List[int]) -> int """ Implements `random.choice` via torch ops so it can be compiled with TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 is fixed. """ index = int(torch.empty(1).uniform_(0., float(len(k))).item()) return k[index] def resize(self, image, target): # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] """ 将图片缩放到指定的大小范围内,并对应缩放bboxes信息 Args: image: 输入的图片 target: 输入图片的相关信息(包括bboxes信息) Returns: image: 缩放后的图片 target: 缩放bboxes后的图片相关信息 """ # image shape is [channel, height, width] h, w = image.shape[-2:] if self.training: size = float(self.torch_choice(self.min_size)) # 指定输入图片的最小边长,注意是self.min_size不是min_size else: # FIXME assume for now that testing uses the largest scale size = float(self.min_size[-1]) # 指定输入图片的最小边长,注意是self.min_size不是min_size if torchvision._is_tracing(): image = _resize_image_onnx(image, size, float(self.max_size)) else: image = _resize_image(image, size, float(self.max_size)) if target is None: return image, target bbox = target["boxes"] # 根据图像的缩放比例来缩放bbox bbox = resize_boxes(bbox, [h, w], image.shape[-2:]) target["boxes"] = bbox return image, target # _onnx_batch_images() is an implementation of # batch_images() that is supported by ONNX tracing. @torch.jit.unused def _onnx_batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor max_size = [] for i in range(images[0].dim()): max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) max_size.append(max_size_i) stride = size_divisible max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) max_size = tuple(max_size) # work around for # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) # which is not yet supported in onnx padded_imgs = [] for img in images: padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]]) padded_imgs.append(padded_img) return torch.stack(padded_imgs) def max_by_axis(self, the_list): # type: (List[List[int]]) -> List[int] maxes = the_list[0] for sublist in the_list[1:]: for index, item in enumerate(sublist): maxes[index] = max(maxes[index], item) return maxes def batch_images(self, images, size_divisible=32): # type: (List[Tensor], int) -> Tensor """ 将一批图像打包成一个batch返回(注意batch中每个tensor的shape是相同的) Args: images: 输入的一批图片 size_divisible: 将图像高和宽调整到该数的整数倍 Returns: batched_imgs: 打包成一个batch后的tensor数据 """ if torchvision._is_tracing(): # batch_images() does not export well to ONNX # call _onnx_batch_images() instead return self._onnx_batch_images(images, size_divisible) # 分别计算一个batch中所有图片中的最大channel, height, width max_size = self.max_by_axis([list(img.shape) for img in images]) stride = float(size_divisible) # max_size = list(max_size) # 将height向上调整到stride的整数倍 max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) # 将width向上调整到stride的整数倍 max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) # [batch, channel, height, width] batch_shape = [len(images)] + max_size # 创建shape为batch_shape且值全部为0的tensor batched_imgs = images[0].new_full(batch_shape, 0) for img, pad_img in zip(images, batched_imgs): # 将输入images中的每张图片复制到新的batched_imgs的每张图片中,对齐左上角,保证bboxes的坐标不变 # 这样保证输入到网络中一个batch的每张图片的shape相同 # copy_: Copies the elements from src into self tensor and returns self pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) return batched_imgs def postprocess(self, result, # type: List[Dict[str, Tensor]] image_shapes, # type: List[Tuple[int, int]] original_image_sizes # type: List[Tuple[int, int]] ): # type: (...) -> List[Dict[str, Tensor]] """ 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上) Args: result: list(dict), 网络的预测结果, len(result) == batch_size image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size Returns: """ if self.training: return result # 遍历每张图片的预测信息,将boxes信息还原回原尺度 for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): boxes = pred["boxes"] boxes = resize_boxes(boxes, im_s, o_im_s) # 将bboxes缩放回原图像尺度上 result[i]["boxes"] = boxes return result def __repr__(self): """自定义输出实例化对象的信息,可通过print打印实例信息""" format_string = self.__class__.__name__ + '(' _indent = '\n ' format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std) format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size, self.max_size) format_string += '\n)' return format_string def forward(self, images, # type: List[Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]] images = [img for img in images] for i in range(len(images)): image = images[i] target_index = targets[i] if targets is not None else None if image.dim() != 3: raise ValueError("images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) image = self.normalize(image) # 对图像进行标准化处理 image, target_index = self.resize(image, target_index) # 对图像和对应的bboxes缩放到指定范围 images[i] = image if targets is not None and target_index is not None: targets[i] = target_index # 记录resize后的图像尺寸 image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) # 将images打包成一个batch image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) for image_size in image_sizes: assert len(image_size) == 2 image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes_list) return image_list, targets def resize_boxes(boxes, original_size, new_size): # type: (Tensor, List[int], List[int]) -> Tensor """ 将boxes参数根据图像的缩放情况进行相应缩放 Arguments: original_size: 图像缩放前的尺寸 new_size: 图像缩放后的尺寸 """ ratios = [ torch.tensor(s, dtype=torch.float32, device=boxes.device) / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) for s, s_orig in zip(new_size, original_size) ] ratios_height, ratios_width = ratios # Removes a tensor dimension, boxes [minibatch, 4] # Returns a tuple of all slices along a given dimension, already without it. xmin, ymin, xmax, ymax = boxes.unbind(1) xmin = xmin * ratios_width xmax = xmax * ratios_width ymin = ymin * ratios_height ymax = ymax * ratios_height return torch.stack((xmin, ymin, xmax, ymax), dim=1) ================================================ FILE: pytorch_object_detection/train_coco_dataset/plot_curve.py ================================================ import datetime import matplotlib.pyplot as plt def plot_loss_and_lr(train_loss, learning_rate): try: x = list(range(len(train_loss))) fig, ax1 = plt.subplots(1, 1) ax1.plot(x, train_loss, 'r', label='loss') ax1.set_xlabel("step") ax1.set_ylabel("loss") ax1.set_title("Train Loss and lr") plt.legend(loc='best') ax2 = ax1.twinx() ax2.plot(x, learning_rate, label='lr') ax2.set_ylabel("learning rate") ax2.set_xlim(0, len(train_loss)) # 设置横坐标整数间隔 plt.legend(loc='best') handles1, labels1 = ax1.get_legend_handles_labels() handles2, labels2 = ax2.get_legend_handles_labels() plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right') fig.subplots_adjust(right=0.8) # 防止出现保存图片显示不全的情况 fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))) plt.close() print("successful save loss curve! ") except Exception as e: print(e) def plot_map(mAP): try: x = list(range(len(mAP))) plt.plot(x, mAP, label='mAp') plt.xlabel('epoch') plt.ylabel('mAP') plt.title('Eval mAP') plt.xlim(0, len(mAP)) plt.legend(loc='best') plt.savefig('./mAP.png') plt.close() print("successful save mAP curve!") except Exception as e: print(e) ================================================ FILE: pytorch_object_detection/train_coco_dataset/predict.py ================================================ import os import time import json import torch import torchvision from PIL import Image import matplotlib.pyplot as plt from torchvision import transforms from torchvision.models.feature_extraction import create_feature_extractor from network_files import FasterRCNN, AnchorsGenerator from backbone import vgg, MobileNetV2, resnet50 from draw_box_utils import draw_objs def create_model(num_classes): res50 = resnet50() backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"}) backbone.out_channels = 1024 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model num_classes = 90 # 不包含背景 model = create_model(num_classes=num_classes + 1) # load train weights weights_path = "./save_weights/model_25.pth" assert os.path.exists(weights_path), "{} file dose not exist.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) # read class_indict label_json_path = './coco91_indices.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: category_index = json.load(f) # load image original_img = Image.open("./test.jpg") # from pil image to tensor, do not normalize image data_transform = transforms.Compose([transforms.ToTensor()]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() predictions = model(img.to(device))[0] t_end = time_synchronized() print("inference+NMS time: {}".format(t_end - t_start)) predict_boxes = predictions["boxes"].to("cpu").numpy() predict_classes = predictions["labels"].to("cpu").numpy() predict_scores = predictions["scores"].to("cpu").numpy() if len(predict_boxes) == 0: print("没有检测到任何目标!") plot_img = draw_objs(original_img, predict_boxes, predict_classes, predict_scores, category_index=category_index, box_thresh=0.5, line_thickness=3, font='arial.ttf', font_size=20) plt.imshow(plot_img) plt.show() # 保存预测的图片结果 plot_img.save("test_result.jpg") if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/train_coco_dataset/requirements.txt ================================================ lxml matplotlib numpy tqdm pycocotools Pillow torch==1.10 torchvision==0.11.1 ================================================ FILE: pytorch_object_detection/train_coco_dataset/results20220408-201436.txt ================================================ epoch:0 0.0504 0.1144 0.0362 0.0207 0.0601 0.0657 0.0702 0.1069 0.1087 0.0335 0.1153 0.1486 1.7430 0.005000 epoch:1 0.1138 0.2300 0.0994 0.0494 0.1279 0.1554 0.1303 0.1940 0.1980 0.0747 0.2051 0.2831 1.2282 0.005000 epoch:2 0.1461 0.2773 0.1394 0.0636 0.1635 0.1997 0.1530 0.2243 0.2288 0.0938 0.2435 0.3309 1.1391 0.005000 epoch:3 0.1669 0.3134 0.1642 0.0750 0.1843 0.2282 0.1680 0.2509 0.2561 0.1091 0.2705 0.3701 1.0902 0.005000 epoch:4 0.1857 0.3389 0.1828 0.0829 0.2074 0.2568 0.1830 0.2708 0.2756 0.1140 0.2937 0.3998 1.0581 0.005000 epoch:5 0.1908 0.3431 0.1930 0.0901 0.2128 0.2578 0.1839 0.2704 0.2753 0.1197 0.2927 0.3893 1.0337 0.005000 epoch:6 0.2044 0.3634 0.2077 0.0954 0.2247 0.2796 0.1947 0.2893 0.2956 0.1317 0.3138 0.4178 1.0127 0.005000 epoch:7 0.2068 0.3651 0.2099 0.0953 0.2269 0.2840 0.1959 0.2869 0.2926 0.1290 0.3093 0.4186 0.9945 0.005000 epoch:8 0.2171 0.3788 0.2218 0.0996 0.2470 0.2969 0.2012 0.3001 0.3071 0.1329 0.3375 0.4371 0.9806 0.005000 epoch:9 0.2146 0.3717 0.2207 0.0946 0.2315 0.3038 0.2011 0.2910 0.2962 0.1277 0.3091 0.4321 0.9691 0.005000 epoch:10 0.2280 0.3974 0.2345 0.1035 0.2535 0.3108 0.2118 0.3119 0.3182 0.1402 0.3429 0.4537 0.9567 0.005000 epoch:11 0.2332 0.3983 0.2443 0.1111 0.2534 0.3149 0.2136 0.3128 0.3190 0.1515 0.3417 0.4438 0.9450 0.005000 epoch:12 0.2400 0.4094 0.2486 0.1102 0.2622 0.3251 0.2175 0.3214 0.3289 0.1507 0.3521 0.4588 0.9369 0.005000 epoch:13 0.2449 0.4152 0.2563 0.1121 0.2741 0.3308 0.2234 0.3286 0.3363 0.1552 0.3703 0.4627 0.9286 0.005000 epoch:14 0.2466 0.4192 0.2542 0.1131 0.2765 0.3412 0.2220 0.3258 0.3322 0.1481 0.3627 0.4776 0.9203 0.005000 epoch:15 0.2492 0.4216 0.2569 0.1147 0.2781 0.3417 0.2254 0.3337 0.3402 0.1565 0.3666 0.4893 0.9116 0.005000 epoch:16 0.2689 0.4433 0.2814 0.1246 0.2963 0.3705 0.2384 0.3495 0.3569 0.1671 0.3864 0.5046 0.8616 0.000500 epoch:17 0.2719 0.4473 0.2865 0.1243 0.3021 0.3743 0.2399 0.3519 0.3593 0.1669 0.3931 0.5017 0.8515 0.000500 epoch:18 0.2738 0.4521 0.2857 0.1256 0.3048 0.3718 0.2416 0.3564 0.3645 0.1713 0.3996 0.5037 0.8472 0.000500 epoch:19 0.2759 0.4534 0.2893 0.1259 0.3094 0.3719 0.2448 0.3603 0.3681 0.1691 0.4073 0.5055 0.8439 0.000500 epoch:20 0.2720 0.4483 0.2838 0.1250 0.3021 0.3681 0.2400 0.3532 0.3613 0.1688 0.3944 0.4994 0.8417 0.000500 epoch:21 0.2748 0.4501 0.2904 0.1241 0.3019 0.3759 0.2421 0.3561 0.3641 0.1682 0.3941 0.5101 0.8378 0.000500 epoch:22 0.2754 0.4532 0.2896 0.1281 0.3064 0.3759 0.2419 0.3586 0.3660 0.1712 0.3993 0.5115 0.8304 0.000050 epoch:23 0.2757 0.4516 0.2907 0.1271 0.3068 0.3748 0.2423 0.3572 0.3650 0.1692 0.4005 0.5087 0.8307 0.000050 epoch:24 0.2750 0.4500 0.2888 0.1256 0.3017 0.3760 0.2411 0.3536 0.3611 0.1669 0.3894 0.5040 0.8299 0.000050 epoch:25 0.2769 0.4537 0.2903 0.1263 0.3082 0.3782 0.2424 0.3582 0.3663 0.1693 0.4020 0.5116 0.8281 0.000050 ================================================ FILE: pytorch_object_detection/train_coco_dataset/train.py ================================================ import os import datetime import torch import torchvision import transforms from network_files import FasterRCNN, AnchorsGenerator from backbone import MobileNetV2, vgg, resnet50 from my_dataset import CocoDetection from train_utils import train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups from torchvision.models.feature_extraction import create_feature_extractor def create_model(num_classes): # 以vgg16为backbone # 预训练权重地址: https://download.pytorch.org/models/vgg16-397923af.pth # vgg16 = vgg(model_name="vgg16", weights_path="./vgg16.pth") # backbone = create_feature_extractor(vgg16, return_nodes={"features.29": "0"}) # 删除feature中最后的maxpool层 # backbone.out_channels = 512 # 以resnet50为backbone # 预训练权重地址:https://download.pytorch.org/models/resnet50-19c8e357.pth res50 = resnet50() res50.load_state_dict(torch.load("./resnet50.pth", map_location="cpu")) backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"}) backbone.out_channels = 1024 # 以mobilenetv2为backbone # 预训练权重地址:https://download.pytorch.org/models/mobilenet_v2-b0353104.pth # backbone = MobileNetV2(weights_path="./mobilenet_v2.pth").features # backbone.out_channels = 1280 # 设置对应backbone输出特征矩阵的channels anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } COCO_root = args.data_path # load train data set # coco2017 -> annotations -> instances_train2017.json train_dataset = CocoDetection(COCO_root, "train", data_transform["train"]) train_sampler = None # 是否按图片相似高宽比采样图片组成batch # 使用的话能够减小训练时所需GPU显存,默认使用 if args.aspect_ratio_group_factor >= 0: train_sampler = torch.utils.data.RandomSampler(train_dataset) # 统计所有图像高宽比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) # 每个batch图片从同一高宽比例区间中取 train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) if train_sampler: # 如果按照图片高宽比采样图片,dataloader中需要使用batch_sampler train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # load validation data set # coco2017 -> annotations -> instances_val2017.json val_dataset = CocoDetection(COCO_root, "val", data_transform["val"]) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=train_dataset.collate_fn) # create model num_classes equal background + classes model = create_model(num_classes=args.num_classes + 1) # print(model) model.to(device) train_loss = [] learning_rate = [] val_map = [] # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) for epoch in range(args.start_epoch, args.epochs): # train for one epoch, printing every 50 iterations mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=50, warmup=True, scaler=scaler) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_info = utils.evaluate(model, val_data_loader, device=device) # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./save_weights/model_{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练设备类型 parser.add_argument('--device', default='cuda:0', help='device') # 训练数据集的根目录 parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=90, type=int, help='num_classes') # 文件保存地址 parser.add_argument('--output-dir', default='./save_weights', help='path where to save') # 若需要接着上次训练,则指定上次训练保存权重文件地址 parser.add_argument('--resume', default='', type=str, help='resume from checkpoint') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=26, type=int, metavar='N', help='number of total epochs to run') # 学习率 parser.add_argument('--lr', default=0.005, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练的batch size(如果内存/GPU显存充裕,建议设置更大) parser.add_argument('--batch_size', default=4, type=int, metavar='N', help='batch size when training.') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() print(args) # 检查保存权重文件夹是否存在,不存在则创建 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/train_coco_dataset/train_multi_GPU.py ================================================ import time import os import datetime import torch import torchvision import transforms from my_dataset import CocoDetection from backbone import resnet50 from network_files import FasterRCNN, AnchorsGenerator import train_utils.train_eval_utils as utils from train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir from torchvision.models.feature_extraction import create_feature_extractor def create_model(num_classes): # 以resnet50为backbone # 预训练权重地址:https://download.pytorch.org/models/resnet50-19c8e357.pth res50 = resnet50() res50.load_state_dict(torch.load("./resnet50.pth", map_location="cpu")) backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"}) backbone.out_channels = 1024 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # Data loading code print("Loading data") data_transform = { "train": transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(0.5)]), "val": transforms.Compose([transforms.ToTensor()]) } COCO_root = args.data_path # load train data set # coco2017 -> annotations -> instances_train2017.json train_dataset = CocoDetection(COCO_root, "train", data_transform["train"]) # load validation data set # coco2017 -> annotations -> instances_val2017.json val_dataset = CocoDetection(COCO_root, "val", data_transform["val"]) print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) if args.aspect_ratio_group_factor >= 0: # 统计所有图像比例在bins区间中的位置索引 group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) data_loader_test = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + classes model = create_model(num_classes=args.num_classes + 1) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp and "scaler" in checkpoint: scaler.load_state_dict(checkpoint["scaler"]) train_loss = [] learning_rate = [] val_map = [] print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, warmup=True, scaler=scaler) # update learning rate lr_scheduler.step() # evaluate after every epoch coco_info = utils.evaluate(model, data_loader_test, device=device) # 只在主进程上进行写操作 if args.rank in [-1, 0]: train_loss.append(mean_loss.item()) learning_rate.append(lr) val_map.append(coco_info[1]) # pascal mAP # write into txt with open(results_file, "a") as f: # 写入的数据包括coco指标还有loss和learning rate result_info = [f"{i:.4f}" for i in coco_info + [mean_loss.item()]] + [f"{lr:.6f}"] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") if args.output_dir: # 只在主节点上执行保存权重操作 save_files = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_files["scaler"] = scaler.state_dict() save_on_master(save_files, os.path.join(args.output_dir, f'model_{epoch}.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if args.rank in [-1, 0]: # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(coco2017) parser.add_argument('--data-path', default='/data/coco2017', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=90, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=26, type=int, metavar='N', help='number of total epochs to run') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 学习率,这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate, 0.02 is the default value for training ' 'on 8 gpus and 2 images_per_gpu') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 针对torch.optim.lr_scheduler.StepLR的参数 parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs') # 针对torch.optim.lr_scheduler.MultiStepLR的参数 parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) # 开启的进程数(注意不是线程) parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') parser.add_argument("--sync-bn", dest="sync_bn", help="Use sync batch norm", type=bool, default=False) # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_object_detection/train_coco_dataset/train_utils/__init__.py ================================================ from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups from .distributed_utils import init_distributed_mode, save_on_master, mkdir from .coco_eval import EvalCOCOMetric ================================================ FILE: pytorch_object_detection/train_coco_dataset/train_utils/coco_eval.py ================================================ import json import copy import numpy as np from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval import pycocotools.mask as mask_util from .distributed_utils import all_gather, is_main_process def merge(img_ids, eval_results): """将多个进程之间的数据汇总在一起""" all_img_ids = all_gather(img_ids) all_eval_results = all_gather(eval_results) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_results = [] for p in all_eval_results: merged_eval_results.extend(p) merged_img_ids = np.array(merged_img_ids) # keep only unique (and in sorted order) images # 去除重复的图片索引,多GPU训练时为了保证每个进程的训练图片数量相同,可能将一张图片分配给多个进程 merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_results = [merged_eval_results[i] for i in idx] return list(merged_img_ids), merged_eval_results class EvalCOCOMetric: def __init__(self, coco: COCO = None, iou_type: str = None, results_file_name: str = "predict_results.json", classes_mapping: dict = None): self.coco = copy.deepcopy(coco) self.img_ids = [] # 记录每个进程处理图片的ids self.results = [] self.aggregation_results = None self.classes_mapping = classes_mapping self.coco_evaluator = None assert iou_type in ["bbox", "segm", "keypoints"] self.iou_type = iou_type self.results_file_name = results_file_name def prepare_for_coco_detection(self, targets, outputs): """将预测的结果转换成COCOeval指定的格式,针对目标检测任务""" # 遍历每张图像的预测结果 for target, output in zip(targets, outputs): if len(output) == 0: continue img_id = int(target["image_id"]) if img_id in self.img_ids: # 防止出现重复的数据 continue self.img_ids.append(img_id) per_image_boxes = output["boxes"] # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h] # 而我们预测的box格式是[x_min, y_min, x_max, y_max],所以需要转下格式 per_image_boxes[:, 2:] -= per_image_boxes[:, :2] per_image_classes = output["labels"].tolist() per_image_scores = output["scores"].tolist() res_list = [] # 遍历每个目标的信息 for object_score, object_class, object_box in zip( per_image_scores, per_image_classes, per_image_boxes): object_score = float(object_score) class_idx = int(object_class) if self.classes_mapping is not None: class_idx = int(self.classes_mapping[str(class_idx)]) # We recommend rounding coordinates to the nearest tenth of a pixel # to reduce resulting JSON file size. object_box = [round(b, 2) for b in object_box.tolist()] res = {"image_id": img_id, "category_id": class_idx, "bbox": object_box, "score": round(object_score, 3)} res_list.append(res) self.results.append(res_list) def prepare_for_coco_segmentation(self, targets, outputs): """将预测的结果转换成COCOeval指定的格式,针对实例分割任务""" # 遍历每张图像的预测结果 for target, output in zip(targets, outputs): if len(output) == 0: continue img_id = int(target["image_id"]) if img_id in self.img_ids: # 防止出现重复的数据 continue self.img_ids.append(img_id) per_image_masks = output["masks"] per_image_classes = output["labels"].tolist() per_image_scores = output["scores"].tolist() masks = per_image_masks > 0.5 res_list = [] # 遍历每个目标的信息 for mask, label, score in zip(masks, per_image_classes, per_image_scores): rle = mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] rle["counts"] = rle["counts"].decode("utf-8") class_idx = int(label) if self.classes_mapping is not None: class_idx = int(self.classes_mapping[str(class_idx)]) res = {"image_id": img_id, "category_id": class_idx, "segmentation": rle, "score": round(score, 3)} res_list.append(res) self.results.append(res_list) def update(self, targets, outputs): if self.iou_type == "bbox": self.prepare_for_coco_detection(targets, outputs) elif self.iou_type == "segm": self.prepare_for_coco_segmentation(targets, outputs) else: raise KeyError(f"not support iou_type: {self.iou_type}") def synchronize_results(self): # 同步所有进程中的数据 eval_ids, eval_results = merge(self.img_ids, self.results) self.aggregation_results = {"img_ids": eval_ids, "results": eval_results} # 主进程上保存即可 if is_main_process(): results = [] [results.extend(i) for i in eval_results] # write predict results into json file json_str = json.dumps(results, indent=4) with open(self.results_file_name, 'w') as json_file: json_file.write(json_str) def evaluate(self): # 只在主进程上评估即可 if is_main_process(): # accumulate predictions from all images coco_true = self.coco coco_pre = coco_true.loadRes(self.results_file_name) self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type) self.coco_evaluator.evaluate() self.coco_evaluator.accumulate() print(f"IoU metric: {self.iou_type}") self.coco_evaluator.summarize() coco_info = self.coco_evaluator.stats.tolist() # numpy to list return coco_info else: return None ================================================ FILE: pytorch_object_detection/train_coco_dataset/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ 收集各个进程中的数据 Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() # 进程数 if world_size == 1: return [data] data_list = [None] * world_size dist.all_gather_object(data_list, data) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_object_detection/train_coco_dataset/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k+1等份 bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_object_detection/train_coco_dataset/train_utils/train_eval_utils.py ================================================ import math import sys import time import torch import train_utils.distributed_utils as utils from .coco_eval import EvalCOCOMetric def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50, warmup=False, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) mloss = torch.zeros(1).to(device) # mean losses for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with torch.cuda.amp.autocast(enabled=scaler is not None): loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # 记录训练损失 mloss = (mloss * i + loss_value) / (i + 1) # update mean losses if not math.isfinite(loss_value): # 当计算的损失为无穷大时停止训练 print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() if scaler is not None: scaler.scale(losses).backward() scaler.step(optimizer) scaler.update() else: losses.backward() optimizer.step() if lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " det_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type="bbox", results_file_name="det_results.json") for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time det_metric.update(targets, outputs) metric_logger.update(model_time=model_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # 同步所有进程中的数据 det_metric.synchronize_results() if utils.is_main_process(): coco_info = det_metric.evaluate() else: coco_info = None return coco_info ================================================ FILE: pytorch_object_detection/train_coco_dataset/transforms.py ================================================ import random from torchvision.transforms import functional as F class Compose(object): """组合多个transform函数""" def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class ToTensor(object): """将PIL图像转为Tensor""" def __call__(self, image, target): image = F.to_tensor(image) return image, target class RandomHorizontalFlip(object): """随机水平翻转图像以及bboxes""" def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: height, width = image.shape[-2:] image = image.flip(-1) # 水平翻转图片 bbox = target["boxes"] # bbox: xmin, ymin, xmax, ymax bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息 target["boxes"] = bbox return image, target ================================================ FILE: pytorch_object_detection/train_coco_dataset/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 以及每个类别的mAP(IoU=0.5) """ import os import json import torch import torchvision from tqdm import tqdm import numpy as np from torchvision.models.feature_extraction import create_feature_extractor import transforms from network_files import FasterRCNN, AnchorsGenerator from my_dataset import CocoDetection from backbone import resnet50 from train_utils import EvalCOCOMetric def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 12, [""] * 12 stats[0], print_list[0] = _summarize(1) stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0]) stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1]) stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2]) stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = { "val": transforms.Compose([transforms.ToTensor()]) } # read class_indict label_json_path = './coco91_indices.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: category_index = json.load(f) coco_root = parser_data.data_path # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = CocoDetection(coco_root, "val", data_transform["val"]) val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model res50 = resnet50() backbone = create_feature_extractor(res50, return_nodes={"layer3": "0"}) backbone.out_channels = 1024 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 # num_classes equal 80 + background classes model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # 载入你自己训练好的模型权重 weights_path = parser_data.weights_path assert os.path.exists(weights_path), "not found {} file.".format(weights_path) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) # print(model) model.to(device) # evaluate on the val dataset cpu_device = torch.device("cpu") det_metric = EvalCOCOMetric(val_dataset.coco, "bbox", "det_results.json") model.eval() with torch.no_grad(): for image, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] det_metric.update(targets, outputs) det_metric.synchronize_results() det_metric.evaluate() # calculate COCO info for all classes coco_stats, print_coco = summarize(det_metric.coco_evaluator) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] classes = [v for v in category_index.values() if v != "N/A"] for i in range(len(classes)): stats, _ = summarize(det_metric.coco_evaluator, catId=i) voc_map_info_list.append(" {:15}: {}".format(classes[i], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = ["COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc] f.write("\n".join(record_lines)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数 parser.add_argument('--num-classes', type=int, default=90, help='number of classes') # 数据集的根目录(coco2017根目录) parser.add_argument('--data-path', default='/data/coco2017', help='dataset root') # 训练好的权重文件 parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights') # batch size parser.add_argument('--batch_size', default=1, type=int, metavar='N', help='batch size when validation.') args = parser.parse_args() main(args) ================================================ FILE: pytorch_object_detection/yolov3_spp/README.md ================================================ # YOLOv3 SPP ## 该项目源自[ultralytics/yolov3](https://github.com/ultralytics/yolov3) ## 1 环境配置: * Python3.6或者3.7 * Pytorch1.7.1(注意:必须是1.6.0或以上,因为使用官方提供的混合精度训练1.6.0后才支持) * pycocotools(Linux: `pip install pycocotools`; Windows: `pip install pycocotools-windows`(不需要额外安装vs)) * 更多环境配置信息,请查看`requirements.txt`文件 * 最好使用GPU训练 ## 2 文件结构: ``` ├── cfg: 配置文件目录 │ ├── hyp.yaml: 训练网络的相关超参数 │ └── yolov3-spp.cfg: yolov3-spp网络结构配置 │ ├── data: 存储训练时数据集相关信息缓存 │ └── pascal_voc_classes.json: pascal voc数据集标签 │ ├── runs: 保存训练过程中生成的所有tensorboard相关文件 ├── build_utils: 搭建训练网络时使用到的工具 │ ├── datasets.py: 数据读取以及预处理方法 │ ├── img_utils.py: 部分图像处理方法 │ ├── layers.py: 实现的一些基础层结构 │ ├── parse_config.py: 解析yolov3-spp.cfg文件 │ ├── torch_utils.py: 使用pytorch实现的一些工具 │ └── utils.py: 训练网络过程中使用到的一些方法 │ ├── train_utils: 训练验证网络时使用到的工具(包括多GPU训练以及使用cocotools) ├── weights: 所有相关预训练权重(下面会给出百度云的下载地址) ├── model.py: 模型搭建文件 ├── train.py: 针对单GPU或者CPU的用户使用 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── trans_voc2yolo.py: 将voc数据集标注信息(.xml)转为yolo标注格式(.txt) ├── calculate_dataset.py: 1)统计训练集和验证集的数据并生成相应.txt文件 │ 2)创建data.data文件 │ 3)根据yolov3-spp.cfg结合数据集类别数创建my_yolov3.cfg文件 └── predict_test.py: 简易的预测脚本,使用训练好的权重进行预测测试 ``` ## 3 训练数据的准备以及目录结构 * 这里建议标注数据时直接生成yolo格式的标签文件`.txt`,推荐使用免费开源的标注软件(支持yolo格式),[https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg) * 如果之前已经标注成pascal voc的`.xml`格式了也没关系,我写了个voc转yolo格式的转化脚本,4.1会讲怎么使用 * 测试图像时最好将图像缩放到32的倍数 * 标注好的数据集请按照以下目录结构进行摆放: ``` ├── my_yolo_dataset 自定义数据集根目录 │ ├── train 训练集目录 │ │ ├── images 训练集图像目录 │ │ └── labels 训练集标签目录 │ └── val 验证集目录 │ ├── images 验证集图像目录 │ └── labels 验证集标签目录 ``` ## 4 利用标注好的数据集生成一系列相关准备文件,为了方便我写了个脚本,通过脚本可直接生成。也可参考原作者的[教程](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data) ``` ├── data 利用数据集生成的一系列相关准备文件目录 │ ├── my_train_data.txt: 该文件里存储的是所有训练图片的路径地址 │ ├── my_val_data.txt: 该文件里存储的是所有验证图片的路径地址 │ ├── my_data_label.names: 该文件里存储的是所有类别的名称,一个类别对应一行(这里会根据`.json`文件自动生成) │ └── my_data.data: 该文件里记录的是类别数类别信息、train以及valid对应的txt文件 ``` ### 4.1 将VOC标注数据转为YOLO标注数据(如果你的数据已经是YOLO格式了,可跳过该步骤) * 使用`trans_voc2yolo.py`脚本进行转换,并在`./data/`文件夹下生成`my_data_label.names`标签文件, * 执行脚本前,需要根据自己的路径修改以下参数 ```python # voc数据集根目录以及版本 voc_root = "./VOCdevkit" voc_version = "VOC2012" # 转换的训练集以及验证集对应txt文件,对应VOCdevkit/VOC2012/ImageSets/Main文件夹下的txt文件 train_txt = "train.txt" val_txt = "val.txt" # 转换后的文件保存目录 save_file_root = "/home/wz/my_project/my_yolo_dataset" # label标签对应json文件 label_json_path = './data/pascal_voc_classes.json' ``` * 生成的`my_data_label.names`标签文件格式如下 ```text aeroplane bicycle bird boat bottle bus ... ``` ### 4.2 根据摆放好的数据集信息生成一系列相关准备文件 * 使用`calculate_dataset.py`脚本生成`my_train_data.txt`文件、`my_val_data.txt`文件以及`my_data.data`文件,并生成新的`my_yolov3.cfg`文件 * 执行脚本前,需要根据自己的路径修改以下参数 ```python # 训练集的labels目录路径 train_annotation_dir = "/home/wz/my_project/my_yolo_dataset/train/labels" # 验证集的labels目录路径 val_annotation_dir = "/home/wz/my_project/my_yolo_dataset/val/labels" # 上一步生成的my_data_label.names文件路径(如果没有该文件,可以自己手动编辑一个txt文档,然后重命名为.names格式即可) classes_label = "./data/my_data_label.names" # 原始yolov3-spp.cfg网络结构配置文件 cfg_path = "./cfg/yolov3-spp.cfg" ``` ## 5 预训练权重下载地址(下载后放入weights文件夹中): * `yolov3-spp-ultralytics-416.pt`: 链接: https://pan.baidu.com/s/1cK3USHKxDx-d5dONij52lA 密码: r3vm * `yolov3-spp-ultralytics-512.pt`: 链接: https://pan.baidu.com/s/1k5yeTZZNv8Xqf0uBXnUK-g 密码: e3k1 * `yolov3-spp-ultralytics-608.pt`: 链接: https://pan.baidu.com/s/1GI8BA0wxeWMC0cjrC01G7Q 密码: ma3t * `yolov3spp-voc-512.pt` **(这是我在视频演示训练中得到的权重)**: 链接: https://pan.baidu.com/s/1aFAtaHlge0ieFtQ9nhmj3w 密码: 8ph3 ## 6 数据集,本例程使用的是PASCAL VOC2012数据集 * `Pascal VOC2012` train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的bilibili:https://b23.tv/F1kSCK ## 7 使用方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要使用单GPU训练或者使用CPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标,前12个值是COCO指标,后面两个值是训练平均损失以及学习率 ## 如果对YOLOv3 SPP网络原理不是很理解可参考我的bilibili [https://www.bilibili.com/video/BV1yi4y1g7ro?p=3](https://www.bilibili.com/video/BV1yi4y1g7ro?p=3) ## 进一步了解该项目,以及对YOLOv3 SPP代码的分析可参考我的bilibili [https://www.bilibili.com/video/BV1t54y1C7ra](https://www.bilibili.com/video/BV1t54y1C7ra) ## YOLOv3 SPP框架图 ![yolov3spp](yolov3spp.png) ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/__init__.py ================================================ ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/datasets.py ================================================ import math import os import random import shutil from pathlib import Path import cv2 import numpy as np import torch from PIL import Image, ExifTags from torch.utils.data import Dataset from tqdm import tqdm from build_utils.utils import xyxy2xywh, xywh2xyxy help_url = 'https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data' img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng'] # get orientation in exif tag # 找到图像exif信息中对应旋转信息的key值 for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == "Orientation": break def exif_size(img): """ 获取图像的原始img size 通过exif的orientation信息判断图像是否有旋转,如果有旋转则返回旋转前的size :param img: PIL图片 :return: 原始图像的size """ # Returns exif-corrected PIL size s = img.size # (width, height) try: rotation = dict(img._getexif().items())[orientation] if rotation == 6: # rotation 270 顺时针翻转90度 s = (s[1], s[0]) elif rotation == 8: # ratation 90 逆时针翻转90度 s = (s[1], s[0]) except: # 如果图像的exif信息中没有旋转信息,则跳过 pass return s class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, # 指向data/my_train_data.txt路径或data/my_val_data.txt路径 # 这里设置的是预处理后输出的图片尺寸 # 当为训练集时,设置的是训练过程中(开启多尺度)的最大尺寸 # 当为验证集时,设置的是最终使用的网络大小 img_size=416, batch_size=16, augment=False, # 训练集设置为True(augment_hsv),验证集设置为False hyp=None, # 超参数字典,其中包含图像增强会使用到的超参数 rect=False, # 是否使用rectangular training cache_images=False, # 是否缓存图片到内存中 single_cls=False, pad=0.0, rank=-1): try: path = str(Path(path)) # parent = str(Path(path).parent) + os.sep if os.path.isfile(path): # file # 读取对应my_train/val_data.txt文件,读取每一行的图片路劲信息 with open(path, "r") as f: f = f.read().splitlines() else: raise Exception("%s does not exist" % path) # 检查每张图片后缀格式是否在支持的列表中,保存支持的图像路径 # img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng'] self.img_files = [x for x in f if os.path.splitext(x)[-1].lower() in img_formats] self.img_files.sort() # 防止不同系统排序不同,导致shape文件出现差异 except Exception as e: raise FileNotFoundError("Error loading data from {}. {}".format(path, e)) # 如果图片列表中没有图片,则报错 n = len(self.img_files) assert n > 0, "No images found in %s. See %s" % (path, help_url) # batch index # 将数据划分到一个个batch中 bi = np.floor(np.arange(n) / batch_size).astype(np.int) # 记录数据集划分后的总batch数 nb = bi[-1] + 1 # number of batches self.n = n # number of images 图像总数目 self.batch = bi # batch index of image 记录哪些图片属于哪个batch self.img_size = img_size # 这里设置的是预处理后输出的图片尺寸 self.augment = augment # 是否启用augment_hsv self.hyp = hyp # 超参数字典,其中包含图像增强会使用到的超参数 self.rect = rect # 是否使用rectangular training # 注意: 开启rect后,mosaic就默认关闭 self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) # Define labels # 遍历设置图像对应的label路径 # (./my_yolo_dataset/train/images/2009_004012.jpg) -> (./my_yolo_dataset/train/labels/2009_004012.txt) self.label_files = [x.replace("images", "labels").replace(os.path.splitext(x)[-1], ".txt") for x in self.img_files] # Read image shapes (wh) # 查看data文件下是否缓存有对应数据集的.shapes文件,里面存储了每张图像的width, height sp = path.replace(".txt", ".shapes") # shapefile path try: with open(sp, "r") as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] # 判断现有的shape文件中的行数(图像个数)是否与当前数据集中图像个数相等 # 如果不相等则认为是不同的数据集,故重新生成shape文件 assert len(s) == n, "shapefile out of aync" except Exception as e: # print("read {} failed [{}], rebuild {}.".format(sp, e, sp)) # tqdm库会显示处理的进度 # 读取每张图片的size信息 if rank in [-1, 0]: image_files = tqdm(self.img_files, desc="Reading image shapes") else: image_files = self.img_files s = [exif_size(Image.open(f)) for f in image_files] # 将所有图片的shape信息保存在.shape文件中 np.savetxt(sp, s, fmt="%g") # overwrite existing (if any) # 记录每张图像的原始尺寸 self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 # 如果为ture,训练网络时,会使用类似原图像比例的矩形(让最长边为img_size),而不是img_size x img_size # 注意: 开启rect后,mosaic就默认关闭 if self.rect: # Sort by aspect ratio s = self.shapes # wh # 计算每个图片的高/宽比 ar = s[:, 1] / s[:, 0] # aspect ratio # argsort函数返回的是数组值从小到大的索引值 # 按照高宽比例进行排序,这样后面划分的每个batch中的图像就拥有类似的高宽比 irect = ar.argsort() # 根据排序后的顺序重新设置图像顺序、标签顺序以及shape顺序 self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # set training image shapes # 计算每个batch采用的统一尺度 shapes = [[1, 1]] * nb # nb: number of batches for i in range(nb): ari = ar[bi == i] # bi: batch index # 获取第i个batch中,最小和最大高宽比 mini, maxi = ari.min(), ari.max() # 如果高/宽小于1(w > h),将w设为img_size if maxi < 1: shapes[i] = [maxi, 1] # 如果高/宽大于1(w < h),将h设置为img_size elif mini > 1: shapes[i] = [1, 1 / mini] # 计算每个batch输入网络的shape值(向上设置为32的整数倍) self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32 # cache labels self.imgs = [None] * n # n为图像总数 # label: [class, x, y, w, h] 其中的xywh都为相对值 self.labels = [np.zeros((0, 5), dtype=np.float32)] * n extract_bounding_boxes, labels_loaded = False, False nm, nf, ne, nd = 0, 0, 0, 0 # number mission, found, empty, duplicate # 这里分别命名是为了防止出现rect为False/True时混用导致计算的mAP错误 # 当rect为True时会对self.images和self.labels进行从新排序 if rect is True: np_labels_path = str(Path(self.label_files[0]).parent) + ".rect.npy" # saved labels in *.npy file else: np_labels_path = str(Path(self.label_files[0]).parent) + ".norect.npy" if os.path.isfile(np_labels_path): x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: # 如果载入的缓存标签个数与当前计算的图像数目相同则认为是同一数据集,直接读缓存 self.labels = x labels_loaded = True # 处理进度条只在第一个进程中显示 if rank in [-1, 0]: pbar = tqdm(self.label_files) else: pbar = self.label_files # 遍历载入标签文件 for i, file in enumerate(pbar): if labels_loaded is True: # 如果存在缓存直接从缓存读取 l = self.labels[i] else: # 从文件读取标签信息 try: with open(file, "r") as f: # 读取每一行label,并按空格划分数据 l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) except Exception as e: print("An error occurred while loading the file {}: {}".format(file, e)) nm += 1 # file missing continue # 如果标注信息不为空的话 if l.shape[0]: # 标签信息每行必须是五个值[class, x, y, w, h] assert l.shape[1] == 5, "> 5 label columns: %s" % file assert (l >= 0).all(), "negative labels: %s" % file assert (l[:, 1:] <= 1).all(), "non-normalized or out of bounds coordinate labels: %s" % file # 检查每一行,看是否有重复信息 if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = "%s%sclassifier%s%g_%g_%s" % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder # 将相对坐标转为绝对坐标 # b: x, y, w, h b = x[1:] * [w, h, w, h] # box # 将宽和高设置为宽和高中的最大值 b[2:] = b[2:].max() # rectangle to square # 放大裁剪目标的宽高 b[2:] = b[2:] * 1.3 + 30 # pad # 将坐标格式从 x,y,w,h -> xmin,ymin,xmax,ymax b = xywh2xyxy(b.reshape(-1, 4)).revel().astype(np.int) # 裁剪bbox坐标到图片内 b[[0, 2]] = np.clip[b[[0, 2]], 0, w] b[[1, 3]] = np.clip[b[[1, 3]], 0, h] assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), "Failure extracting classifier boxes" else: ne += 1 # file empty # 处理进度条只在第一个进程中显示 if rank in [-1, 0]: # 更新进度条描述信息 pbar.desc = "Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)" % ( nf, nm, ne, nd, n) assert nf > 0, "No labels found in %s." % os.path.dirname(self.label_files[0]) + os.sep # 如果标签信息没有被保存成numpy的格式,且训练样本数大于1000则将标签信息保存成numpy的格式 if not labels_loaded and n > 1000: print("Saving labels to %s for faster future loading" % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (Warning: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images 用于记录缓存图像占用RAM大小 if rank in [-1, 0]: pbar = tqdm(range(len(self.img_files)), desc="Caching images") else: pbar = range(len(self.img_files)) self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes # 用于记录缓存图像占用RAM大小 if rank in [-1, 0]: pbar.desc = "Caching images (%.1fGB)" % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc="Detecting corrupted images"): try: _ = io.imread(file) except Exception as e: print("Corrupted image detected: {}, {}".format(file, e)) def __len__(self): return len(self.img_files) def __getitem__(self, index): hyp = self.hyp if self.mosaic: # load mosaic img, labels = load_mosaic(self, index) shapes = None else: # load image img, (h0, w0), (h, w) = load_image(self, index) # letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scale_up=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() # label: class, x, y, w, h labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: img, labels = random_affine(img, labels, degrees=hyp["degrees"], translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"]) # Augment colorspace augment_hsv(img, h_gain=hyp["hsv_h"], s_gain=hyp["hsv_s"], v_gain=hyp["hsv_v"]) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0-1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True # 随机水平翻转 if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # 1 - x_center # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] # 1 - y_center labels_out = torch.zeros((nL, 6)) # nL: number of labels if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert BGR to RGB, and HWC to CHW(3x512x512) img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes, index def coco_index(self, index): """该方法是专门为cocotools统计标签信息准备,不对图像和标签作任何处理""" o_shapes = self.shapes[index][::-1] # wh to hw # load labels x = self.labels[index] labels = x.copy() # label: class, x, y, w, h return torch.from_numpy(labels), o_shapes @staticmethod def collate_fn(batch): img, label, path, shapes, index = zip(*batch) # transposed for i, l in enumerate(label): l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes, index def load_image(self, index): # loads 1 image from dataset, returns img, original hw, resized hw img = self.imgs[index] if img is None: # not cached path = self.img_files[index] img = cv2.imread(path) # BGR assert img is not None, "Image Not Found " + path h0, w0 = img.shape[:2] # orig hw # img_size 设置的是预处理后输出的图片尺寸 r = self.img_size / max(h0, w0) # resize image to img_size if r != 1: # if sizes are not equal interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized else: return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized def load_mosaic(self, index): """ 将四张图片拼接在一张马赛克图像中 :param self: :param index: 需要获取的图像索引 :return: """ # loads images in a mosaic labels4 = [] # 拼接图像的label信息 s = self.img_size # 随机初始化拼接图像的中心点坐标 xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)] # mosaic center x, y # 从dataset中随机寻找三张图像进行拼接 indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices # 遍历四张图像进行拼接 for i, index in enumerate(indices): # load image img, _, (h, w) = load_image(self, index) # place img in img4 if i == 0: # top left # 创建马赛克图像 img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中) x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) # 计算截取的图像区域信息(以xc,yc为第一张图像的右下角坐标填充到马赛克图像中,丢弃越界的区域) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中) x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc # 计算截取的图像区域信息(以xc,yc为第二张图像的左下角坐标填充到马赛克图像中,丢弃越界的区域) x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中) x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) # 计算截取的图像区域信息(以xc,yc为第三张图像的右上角坐标填充到马赛克图像中,丢弃越界的区域) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h) elif i == 3: # bottom right # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中) x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) # 计算截取的图像区域信息(以xc,yc为第四张图像的左上角坐标填充到马赛克图像中,丢弃越界的区域) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) # 将截取的图像区域填充到马赛克图像的相应位置 img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] # 计算pad(图像边界与马赛克边界的距离,越界的情况为负值) padw = x1a - x1b padh = y1a - y1b # Labels 获取对应拼接图像的labels信息 # [class_index, x_center, y_center, w, h] x = self.labels[index] labels = x.copy() # 深拷贝,防止修改原数据 if x.size > 0: # Normalized xywh to pixel xyxy format # 计算标注数据在马赛克图像中的坐标(绝对坐标) labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw # xmin labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh # ymin labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw # xmax labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh # ymax labels4.append(labels) # Concat/clip labels if len(labels4): labels4 = np.concatenate(labels4, 0) # 设置上下限防止越界 np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine # Augment # 随机旋转,缩放,平移以及错切 img4, labels4 = random_affine(img4, labels4, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], border=-s // 2) # border to remove return img4, labels4 def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0): """随机旋转,缩放,平移以及错切""" # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 # 这里可以参考我写的博文: https://blog.csdn.net/qq_37541097/article/details/119420860 # targets = [cls, xyxy] # 最终输出的图像尺寸,等于img4.shape / 2 height = img.shape[0] + border * 2 width = img.shape[1] + border * 2 # Rotation and Scale # 生成旋转以及缩放矩阵 R = np.eye(3) # 生成对角阵 a = random.uniform(-degrees, degrees) # 随机旋转角度 s = random.uniform(1 - scale, 1 + scale) # 随机缩放因子 R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) # Translation # 生成平移矩阵 T = np.eye(3) T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels) T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels) # Shear # 生成错切矩阵 S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Combined rotation matrix M = S @ T @ R # ORDER IS IMPORTANT HERE!! if (border != 0) or (M != np.eye(3)).any(): # image changed # 进行仿射变化 img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114)) # Transform label coordinates n = len(targets) if n: # warp points xy = np.ones((n * 4, 3)) xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 # [4*n, 3] -> [n, 8] xy = (xy @ M.T)[:, :2].reshape(n, 8) # create new boxes # 对transform后的bbox进行修正(假设变换后的bbox变成了菱形,此时要修正成矩形) x = xy[:, [0, 2, 4, 6]] # [n, 4] y = xy[:, [1, 3, 5, 7]] # [n, 4] xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T # [n, 4] # reject warped points outside of image # 对坐标进行裁剪,防止越界 xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) w = xy[:, 2] - xy[:, 0] h = xy[:, 3] - xy[:, 1] # 计算调整后的每个box的面积 area = w * h # 计算调整前的每个box的面积 area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2]) # 计算每个box的比例 ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) # aspect ratio # 选取长宽大于4个像素,且调整前后面积比例大于0.2,且比例小于10的box i = (w > 4) & (h > 4) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 10) targets = targets[i] targets[:, 1:5] = xy[i] return img, targets def augment_hsv(img, h_gain=0.5, s_gain=0.5, v_gain=0.5): # 这里可以参考我写的博文:https://blog.csdn.net/qq_37541097/article/details/119478023 r = np.random.uniform(-1, 1, 3) * [h_gain, s_gain, v_gain] + 1 # random gains hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 x = np.arange(0, 256, dtype=np.int16) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed def letterbox(img: np.ndarray, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale_fill=False, scale_up=True): """ 将图片缩放调整到指定大小 :param img: :param new_shape: :param color: :param auto: :param scale_fill: :param scale_up: :return: """ shape = img.shape[:2] # [h, w] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scale_up: # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变 r = min(r, 1.0) # compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimun rectangle 保证原图比例不变,将图像最大边缩放到指定大小 # 这里的取余操作可以保证padding后的图片是32的整数倍 dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding elif scale_fill: # stretch 简单粗暴的将图片缩放到指定尺寸 dw, dh = 0, 0 new_unpad = new_shape ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # wh ratios dw /= 2 # divide padding into 2 sides 将padding分到上下,左右两侧 dh /= 2 # shape:[h, w] new_unpad:[w, h] if shape[::-1] != new_unpad: img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) # 计算上下两侧的padding left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 计算左右两侧的padding img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh) def create_folder(path="./new_folder"): # Create floder if os.path.exists(path): shutil.rmtree(path) # dalete output folder os.makedirs(path) # make new output folder ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/img_utils.py ================================================ import numpy as np import cv2 def letterbox(img: np.ndarray, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale_fill=False, scale_up=True): """ 将图片缩放调整到指定大小 :param img: 输入的图像numpy格式 :param new_shape: 输入网络的shape :param color: padding用什么颜色填充 :param auto: :param scale_fill: 简单粗暴缩放到指定大小 :param scale_up: 只缩小,不放大 :return: """ shape = img.shape[:2] # [h, w] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scale_up: # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变 r = min(r, 1.0) # compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimun rectangle 保证原图比例不变,将图像最大边缩放到指定大小 # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416),如果是(512x512)可以保证是64的整数倍 dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding elif scale_fill: # stretch 简单粗暴的将图片缩放到指定尺寸 dw, dh = 0, 0 new_unpad = new_shape[::-1] # [h, w] -> [w, h] ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # wh ratios dw /= 2 # divide padding into 2 sides 将padding分到上下,左右两侧 dh /= 2 # shape:[h, w] new_unpad:[w, h] if shape[::-1] != new_unpad: img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) # 计算上下两侧的padding left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 计算左右两侧的padding img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh) ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/layers.py ================================================ import torch.nn.functional as F from .utils import * def make_divisible(v, divisor): # Function ensures all layers have a channel number that is divisible by 8 # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py return math.ceil(v / divisor) * divisor class Flatten(nn.Module): # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions def forward(self, x): return x.view(x.size(0), -1) class Concat(nn.Module): # Concatenate a list of tensors along dimension def __init__(self, dimension=1): super(Concat, self).__init__() self.d = dimension def forward(self, x): return torch.cat(x, self.d) class FeatureConcat(nn.Module): """ 将多个特征矩阵在channel维度进行concatenate拼接 """ def __init__(self, layers): super(FeatureConcat, self).__init__() self.layers = layers # layer indices self.multiple = len(layers) > 1 # multiple layers flag def forward(self, x, outputs): return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 """ 将多个特征矩阵的值进行融合(add操作) """ def __init__(self, layers, weight=False): super(WeightedFeatureFusion, self).__init__() self.layers = layers # layer indices self.weight = weight # apply weights boolean self.n = len(layers) + 1 # number of layers 融合的特征矩阵个数 if weight: self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights def forward(self, x, outputs): # Weights if self.weight: w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) x = x * w[0] # Fusion nx = x.shape[1] # input channels for i in range(self.n - 1): a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add na = a.shape[1] # feature channels # Adjust channels # 根据相加的两个特征矩阵的channel选择相加方式 if nx == na: # same shape 如果channel相同,直接相加 x = x + a elif nx > na: # slice input 如果channel不同,将channel多的特征矩阵砍掉部分channel保证相加的channel一致 x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a else: # slice feature x = x + a[:, :nx] return x class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): super(MixConv2d, self).__init__() groups = len(k) if method == 'equal_ch': # equal channels per group i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices ch = [(i == g).sum() for g in range(groups)] else: # 'equal_params': equal parameter count per group b = [out_ch] + [0] * groups a = np.eye(groups + 1, groups, k=-1) a -= np.roll(a, 1, axis=1) a *= np.array(k) ** 2 a[0] = 1 ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, out_channels=ch[g], kernel_size=k[g], stride=stride, padding=k[g] // 2, # 'same' pad dilation=dilation, bias=bias) for g in range(groups)]) def forward(self, x): return torch.cat([m(x) for m in self.m], 1) # Activation functions below ------------------------------------------------------------------------------------------- class SwishImplementation(torch.autograd.Function): @staticmethod def forward(ctx, x): ctx.save_for_backward(x) return x * torch.sigmoid(x) @staticmethod def backward(ctx, grad_output): x = ctx.saved_tensors[0] sx = torch.sigmoid(x) # sigmoid(ctx) return grad_output * (sx * (1 + x * (1 - sx))) class MishImplementation(torch.autograd.Function): @staticmethod def forward(ctx, x): ctx.save_for_backward(x) return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) @staticmethod def backward(ctx, grad_output): x = ctx.saved_tensors[0] sx = torch.sigmoid(x) fx = F.softplus(x).tanh() return grad_output * (fx + x * sx * (1 - fx * fx)) class MemoryEfficientSwish(nn.Module): def forward(self, x): return SwishImplementation.apply(x) class MemoryEfficientMish(nn.Module): def forward(self, x): return MishImplementation.apply(x) class Swish(nn.Module): def forward(self, x): return x * torch.sigmoid(x) class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf def forward(self, x): return x * F.hardtanh(x + 3, 0., 6., True) / 6. class Mish(nn.Module): # https://github.com/digantamisra98/Mish def forward(self, x): return x * F.softplus(x).tanh() ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/parse_config.py ================================================ import os import numpy as np def parse_model_cfg(path: str): # 检查文件是否存在 if not path.endswith(".cfg") or not os.path.exists(path): raise FileNotFoundError("the cfg file not exist...") # 读取文件信息 with open(path, "r") as f: lines = f.read().split("\n") # 去除空行和注释行 lines = [x for x in lines if x and not x.startswith("#")] # 去除每行开头和结尾的空格符 lines = [x.strip() for x in lines] mdefs = [] # module definitions for line in lines: if line.startswith("["): # this marks the start of a new block mdefs.append({}) mdefs[-1]["type"] = line[1:-1].strip() # 记录module类型 # 如果是卷积模块,设置默认不使用BN(普通卷积层后面会重写成1,最后的预测层conv保持为0) if mdefs[-1]["type"] == "convolutional": mdefs[-1]["batch_normalize"] = 0 else: key, val = line.split("=") key = key.strip() val = val.strip() if key == "anchors": # anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 val = val.replace(" ", "") # 将空格去除 mdefs[-1][key] = np.array([float(x) for x in val.split(",")]).reshape((-1, 2)) # np anchors elif (key in ["from", "layers", "mask"]) or (key == "size" and "," in val): mdefs[-1][key] = [int(x) for x in val.split(",")] else: # TODO: .isnumeric() actually fails to get the float case if val.isnumeric(): # return int or float 如果是数值的情况 mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) else: mdefs[-1][key] = val # return string 是字符的情况 # check all fields are supported supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'probability'] # 遍历检查每个模型的配置 for x in mdefs[1:]: # 0对应net配置 # 遍历每个配置字典中的key值 for k in x: if k not in supported: raise ValueError("Unsupported fields:{} in cfg".format(k)) return mdefs def parse_data_cfg(path): # Parses the data configuration file if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted path = 'data' + os.sep + path with open(path, 'r') as f: lines = f.readlines() options = dict() for line in lines: line = line.strip() if line == '' or line.startswith('#'): continue key, val = line.split('=') options[key.strip()] = val.strip() return options ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/torch_utils.py ================================================ import math import time from copy import deepcopy import torch import torch.backends.cudnn as cudnn import torch.nn as nn def init_seeds(seed=0): torch.manual_seed(seed) # Reduce randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html if seed == 0: cudnn.deterministic = False cudnn.benchmark = True def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def initialize_weights(model): for m in model.modules(): t = type(m) if t is nn.Conv2d: pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif t is nn.BatchNorm2d: m.eps = 1e-4 m.momentum = 0.03 elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: m.inplace = True def model_info(model, verbose=False): # Plots a line-by-line description of a PyTorch model n_p = sum(x.numel() for x in model.parameters()) # number parameters n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients if verbose: print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) for i, (name, p) in enumerate(model.named_parameters()): name = name.replace('module_list.', '') print('%5g %40s %9s %12g %20s %10.3g %10.3g' % (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) try: # FLOPS from thop import profile macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False) fs = ', %.1f GFLOPS' % (macs / 1E9 * 2) except: fs = '' print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs)) class ModelEMA: """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models Keep a moving average of everything in the model state_dict (parameters and buffers). This is intended to allow functionality like https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage A smoothed version of the weights is necessary for some training schemes to perform well. E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA smoothing of weights to match results. Pay attention to the decay constant you are using relative to your update count per epoch. To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but disable validation of the EMA weights. Validation will have to be done manually in a separate process, or after the training stops converging. This class is sensitive where it is initialized in the sequence of model init, GPU assignment and distributed training wrappers. I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU. """ def __init__(self, model, decay=0.9999, device=''): # make a copy of the model for accumulating moving average of weights self.ema = deepcopy(model) self.ema.eval() self.updates = 0 # number of EMA updates self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) self.device = device # perform ema on different device from model if set if device: self.ema.to(device=device) for p in self.ema.parameters(): p.requires_grad_(False) def update(self, model): self.updates += 1 d = self.decay(self.updates) with torch.no_grad(): if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel): msd, esd = model.module.state_dict(), self.ema.module.state_dict() else: msd, esd = model.state_dict(), self.ema.state_dict() for k, v in esd.items(): if v.dtype.is_floating_point: v *= d v += (1. - d) * msd[k].detach() def update_attr(self, model): # Assign attributes (which may change during training) for k in model.__dict__.keys(): if not k.startswith('_'): setattr(self.ema, k, getattr(model, k)) ================================================ FILE: pytorch_object_detection/yolov3_spp/build_utils/utils.py ================================================ import glob import math import os import random import time import cv2 import matplotlib import numpy as np import torch import torch.nn as nn import torchvision from tqdm import tqdm from build_utils import torch_utils # , google_utils # Set printoptions torch.set_printoptions(linewidth=320, precision=5, profile='long') np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 matplotlib.rc('font', **{'size': 11}) # Prevent OpenCV from multithreading (to use PyTorch DataLoader) cv2.setNumThreads(0) def init_seeds(seed=0): random.seed(seed) np.random.seed(seed) torch_utils.init_seeds(seed=seed) def check_file(file): # Searches for file if not found locally if os.path.isfile(file): return file else: files = glob.glob('./**/' + file, recursive=True) # find file assert len(files), 'File Not Found: %s' % file # assert file was found return files[0] # return first file if multiple found def xyxy2xywh(x): # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center y[:, 2] = x[:, 2] - x[:, 0] # width y[:, 3] = x[:, 3] - x[:, 1] # height return y def xywh2xyxy(x): # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y return y def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): """ 将预测的坐标信息转换回原图尺度 :param img1_shape: 缩放后的图像尺度 :param coords: 预测的box信息 :param img0_shape: 缩放前的图像尺度 :param ratio_pad: 缩放过程中的缩放比例以及pad :return: """ # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = max(img1_shape) / max(img0_shape) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] coords[:, [0, 2]] -= pad[0] # x padding coords[:, [1, 3]] -= pad[1] # y padding coords[:, :4] /= gain clip_coords(coords, img0_shape) return coords def clip_coords(boxes, img_shape): # Clip bounding xyxy bounding boxes to image shape (height, width) boxes[:, 0].clamp_(0, img_shape[1]) # x1 boxes[:, 1].clamp_(0, img_shape[0]) # y1 boxes[:, 2].clamp_(0, img_shape[1]) # x2 boxes[:, 3].clamp_(0, img_shape[0]) # y2 def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False): # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.t() # Get the coordinates of bounding boxes if x1y1x2y2: # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] else: # transform from xywh to xyxy b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 union = (w1 * h1 + 1e-16) + w2 * h2 - inter iou = inter / union # iou if GIoU or DIoU or CIoU: cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if GIoU: # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf c_area = cw * ch + 1e-16 # convex area return iou - (c_area - union) / c_area # GIoU if DIoU or CIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 # convex diagonal squared c2 = cw ** 2 + ch ** 2 + 1e-16 # centerpoint distance squared rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4 if DIoU: return iou - rho2 / c2 # DIoU elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (1 - iou + v) return iou - (rho2 / c2 + v * alpha) # CIoU return iou def box_iou(box1, box2): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ def box_area(box): # box = 4xn return (box[2] - box[0]) * (box[3] - box[1]) area1 = box_area(box1.t()) area2 = box_area(box2.t()) # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) def wh_iou(wh1, wh2): # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super(FocalLoss, self).__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = 'none' # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == 'mean': return loss.mean() elif self.reduction == 'sum': return loss.sum() else: # 'none' return loss def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps def compute_loss(p, targets, model): # predictions, targets, model device = p[0].device lcls = torch.zeros(1, device=device) # Tensor(0) lbox = torch.zeros(1, device=device) # Tensor(0) lobj = torch.zeros(1, device=device) # Tensor(0) tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets h = model.hyp # hyperparameters red = 'mean' # Loss reduction (sum or mean) # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device), reduction=red) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device), reduction=red) # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 cp, cn = smooth_BCE(eps=0.0) # focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) # per output for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image_idx, anchor_idx, grid_y, grid_x tobj = torch.zeros_like(pi[..., 0], device=device) # target obj nb = b.shape[0] # number of positive samples if nb: # 对应匹配到正样本的预测信息 ps = pi[b, a, gj, gi] # prediction subset corresponding to targets # GIoU pxy = ps[:, :2].sigmoid() pwh = ps[:, 2:4].exp().clamp(max=1E3) * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True) # giou(prediction, target) lbox += (1.0 - giou).mean() # giou loss # Obj tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) # giou ratio # Class if model.nc > 1: # cls loss (only if multiple classes) t = torch.full_like(ps[:, 5:], cn, device=device) # targets t[range(nb), tcls[i]] = cp lcls += BCEcls(ps[:, 5:], t) # BCE # Append targets to text file # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] lobj += BCEobj(pi[..., 4], tobj) # obj loss # 乘上每种损失的对应权重 lbox *= h['giou'] lobj *= h['obj'] lcls *= h['cls'] # loss = lbox + lobj + lcls return {"box_loss": lbox, "obj_loss": lobj, "class_loss": lcls} def build_targets(p, targets, model): # Build targets for compute_loss(), input targets(image_idx,class,x,y,w,h) nt = targets.shape[0] tcls, tbox, indices, anch = [], [], [], [] gain = torch.ones(6, device=targets.device).long() # normalized to gridspace gain multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) for i, j in enumerate(model.yolo_layers): # j: [89, 101, 113] # 获取该yolo predictor对应的anchors # 注意anchor_vec是anchors缩放到对应特征层上的尺度 anchors = model.module.module_list[j].anchor_vec if multi_gpu else model.module_list[j].anchor_vec # p[i].shape: [batch_size, 3, grid_h, grid_w, num_params] gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain na = anchors.shape[0] # number of anchors # [3] -> [3, 1] -> [3, nt] at = torch.arange(na).view(na, 1).repeat(1, nt) # anchor tensor, same as .repeat_interleave(nt) # Match targets to anchors a, t, offsets = [], targets * gain, 0 if nt: # 如果存在target的话 # 通过计算anchor模板与所有target的wh_iou来匹配正样本 # j: [3, nt] , iou_t = 0.20 j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) # t.repeat(na, 1, 1): [nt, 6] -> [3, nt, 6] # 获取正样本对应的anchor模板与target信息 a, t = at[j], t.repeat(na, 1, 1)[j] # filter # Define # long等于to(torch.int64), 数值向下取整 b, c = t[:, :2].long().T # image_idx, class gxy = t[:, 2:4] # grid xy gwh = t[:, 4:6] # grid wh gij = (gxy - offsets).long() # 匹配targets所在的grid cell左上角坐标 gi, gj = gij.T # grid xy indices # Append # gain[3]: grid_h, gain[2]: grid_w # image_idx, anchor_idx, grid indices(y, x) indices.append((b, a, gj.clamp_(0, gain[3]-1), gi.clamp_(0, gain[2]-1))) tbox.append(torch.cat((gxy - gij, gwh), 1)) # gt box相对anchor的x,y偏移量以及w,h anch.append(anchors[a]) # anchors tcls.append(c) # class if c.shape[0]: # if any targets # 目标的标签数值不能大于给定的目标类别数 assert c.max() < model.nc, 'Model accepts %g classes labeled from 0-%g, however you labelled a class %g. ' \ 'See https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data' % ( model.nc, model.nc - 1, c.max()) return tcls, tbox, indices, anch def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, multi_label=True, classes=None, agnostic=False, max_num=100): """ Performs Non-Maximum Suppression on inference results param: prediction[batch, num_anchors, (num_classes+1+4) x num_anchors] Returns detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ # Settings merge = False # merge for best mAP min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height time_limit = 10.0 # seconds to quit after t = time.time() nc = prediction[0].shape[1] - 5 # number of classes multi_label &= nc > 1 # multiple labels per box output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference 遍历每张图片 # Apply constraints x = x[x[:, 4] > conf_thres] # confidence 根据obj confidence虑除背景目标 x = x[((x[:, 2:4] > min_wh) & (x[:, 2:4] < max_wh)).all(1)] # width-height 虑除小目标 # If none remain process next image if not x.shape[0]: continue # Compute conf x[..., 5:] *= x[..., 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: # 针对每个类别执行非极大值抑制 i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).t() x = torch.cat((box[i], x[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1) else: # best class only 直接针对每个类别中概率最大的类别进行非极大值抑制处理 conf, j = x[:, 5:].max(1) x = torch.cat((box, conf.unsqueeze(1), j.float().unsqueeze(1)), 1)[conf > conf_thres] # Filter by class if classes: x = x[(j.view(-1, 1) == torch.tensor(classes, device=j.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5] * 0 if agnostic else x[:, 5] # classes boxes, scores = x[:, :4].clone() + c.view(-1, 1) * max_wh, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) i = i[:max_num] # 最多只保留前max_num个目标信息 if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes # i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output def get_yolo_layers(model): bool_vec = [x['type'] == 'yolo' for x in model.module_defs] return [i for i, x in enumerate(bool_vec) if x] # [82, 94, 106] for yolov3 def kmean_anchors(path='./data/coco64.txt', n=9, img_size=(640, 640), thr=0.20, gen=1000): # Creates kmeans anchors for use in *.cfg files: from build_utils.build_utils import *; _ = kmean_anchors() # n: number of anchors # img_size: (min, max) image size used for multi-scale training (can be same values) # thr: IoU threshold hyperparameter used for training (0.0 - 1.0) # gen: generations to evolve anchors using genetic algorithm from build_utils.datasets import LoadImagesAndLabels def print_results(k): k = k[np.argsort(k.prod(1))] # sort small to large iou = wh_iou(wh, torch.Tensor(k)) max_iou = iou.max(1)[0] bpr, aat = (max_iou > thr).float().mean(), (iou > thr).float().mean() * n # best possible recall, anch > thr print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' % (thr, bpr, aat)) print('n=%g, img_size=%s, IoU_all=%.3f/%.3f-mean/best, IoU>thr=%.3f-mean: ' % (n, img_size, iou.mean(), max_iou.mean(), iou[iou > thr].mean()), end='') for i, x in enumerate(k): print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg return k def fitness(k): # mutation fitness iou = wh_iou(wh, torch.Tensor(k)) # iou max_iou = iou.max(1)[0] return (max_iou * (max_iou > thr).float()).mean() # product # Get label wh wh = [] dataset = LoadImagesAndLabels(path, augment=True, rect=True) nr = 1 if img_size[0] == img_size[1] else 10 # number augmentation repetitions for s, l in zip(dataset.shapes, dataset.labels): wh.append(l[:, 3:5] * (s / s.max())) # image normalized to letterbox normalized wh wh = np.concatenate(wh, 0).repeat(nr, axis=0) # augment 10x wh *= np.random.uniform(img_size[0], img_size[1], size=(wh.shape[0], 1)) # normalized to pixels (multi-scale) wh = wh[(wh > 2.0).all(1)] # remove below threshold boxes (< 2 pixels wh) # Kmeans calculation from scipy.cluster.vq import kmeans print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) s = wh.std(0) # sigmas for whitening k, dist = kmeans(wh / s, n, iter=30) # points, mean distance k *= s wh = torch.Tensor(wh) k = print_results(k) # # Plot # k, d = [None] * 20, [None] * 20 # for i in tqdm(range(1, 21)): # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # ax = ax.ravel() # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh # ax[0].hist(wh[wh[:, 0]<100, 0],400) # ax[1].hist(wh[wh[:, 1]<100, 1],400) # fig.tight_layout() # fig.savefig('wh.png', dpi=200) # Evolve npr = np.random f, sh, mp, s = fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma for _ in tqdm(range(gen), desc='Evolving anchors'): v = np.ones(sh) while (v == 1).all(): # mutate until a change occurs (prevent duplicates) v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) kg = (k.copy() * v).clip(min=2.0) fg = fitness(kg) if fg > f: f, k = fg, kg.copy() print_results(k) k = print_results(k) return k ================================================ FILE: pytorch_object_detection/yolov3_spp/calculate_dataset.py ================================================ """ 该脚本有3个功能: 1.统计训练集和验证集的数据并生成相应.txt文件 2.创建data.data文件,记录classes个数, train以及val数据集文件(.txt)路径和label.names文件路径 3.根据yolov3-spp.cfg创建my_yolov3.cfg文件修改其中的predictor filters以及yolo classes参数(这两个参数是根据类别数改变的) """ import os train_annotation_dir = "./my_yolo_dataset/train/labels" val_annotation_dir = "./my_yolo_dataset/val/labels" classes_label = "./data/my_data_label.names" cfg_path = "./cfg/yolov3-spp.cfg" assert os.path.exists(train_annotation_dir), "train_annotation_dir not exist!" assert os.path.exists(val_annotation_dir), "val_annotation_dir not exist!" assert os.path.exists(classes_label), "classes_label not exist!" assert os.path.exists(cfg_path), "cfg_path not exist!" def calculate_data_txt(txt_path, dataset_dir): # create my_data.txt file that record image list with open(txt_path, "w") as w: for file_name in os.listdir(dataset_dir): if file_name == "classes.txt": continue img_path = os.path.join(dataset_dir.replace("labels", "images"), file_name.split(".")[0]) + ".jpg" line = img_path + "\n" assert os.path.exists(img_path), "file:{} not exist!".format(img_path) w.write(line) def create_data_data(create_data_path, label_path, train_path, val_path, classes_info): # create my_data.data file that record classes, train, valid and names info. # shutil.copyfile(label_path, "./data/my_data_label.names") with open(create_data_path, "w") as w: w.write("classes={}".format(len(classes_info)) + "\n") # 记录类别个数 w.write("train={}".format(train_path) + "\n") # 记录训练集对应txt文件路径 w.write("valid={}".format(val_path) + "\n") # 记录验证集对应txt文件路径 w.write("names=data/my_data_label.names" + "\n") # 记录label.names文件路径 def change_and_create_cfg_file(classes_info, save_cfg_path="./cfg/my_yolov3.cfg"): # create my_yolov3.cfg file changed predictor filters and yolo classes param. # this operation only deal with yolov3-spp.cfg filters_lines = [636, 722, 809] classes_lines = [643, 729, 816] cfg_lines = open(cfg_path, "r").readlines() for i in filters_lines: assert "filters" in cfg_lines[i-1], "filters param is not in line:{}".format(i-1) output_num = (5 + len(classes_info)) * 3 cfg_lines[i-1] = "filters={}\n".format(output_num) for i in classes_lines: assert "classes" in cfg_lines[i-1], "classes param is not in line:{}".format(i-1) cfg_lines[i-1] = "classes={}\n".format(len(classes_info)) with open(save_cfg_path, "w") as w: w.writelines(cfg_lines) def main(): # 统计训练集和验证集的数据并生成相应txt文件 train_txt_path = "data/my_train_data.txt" val_txt_path = "data/my_val_data.txt" calculate_data_txt(train_txt_path, train_annotation_dir) calculate_data_txt(val_txt_path, val_annotation_dir) classes_info = [line.strip() for line in open(classes_label, "r").readlines() if len(line.strip()) > 0] # 创建data.data文件,记录classes个数, train以及val数据集文件(.txt)路径和label.names文件路径 create_data_data("./data/my_data.data", classes_label, train_txt_path, val_txt_path, classes_info) # 根据yolov3-spp.cfg创建my_yolov3.cfg文件修改其中的predictor filters以及yolo classes参数(这两个参数是根据类别数改变的) change_and_create_cfg_file(classes_info) if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/yolov3_spp/cfg/hyp.yaml ================================================ # Hyperparameters for training giou: 3.54 # giou loss gain cls: 37.4 # cls loss gain cls_pw: 1.0 # cls BCELoss positive_weight obj: 64.3 # obj loss gain (*=img_size/320 if img_size != 320) obj_pw: 1.0 # obj BCELoss positive_weight iou_t: 0.20 # iou training threshold lr0: 0.001 # initial learning rate (SGD=5E-3 Adam=5E-4) lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum weight_decay: 0.0005 # optimizer weight decay fl_gamma: 0.0 # focal loss gamma (efficientDet default is gamma=1.5) hsv_h: 0.0138 # image HSV-Hue augmentation (fraction) hsv_s: 0.678 # image HSV-Saturation augmentation (fraction) hsv_v: 0.36 # image HSV-Value augmentation (fraction) degrees: 0. # image rotation (+/- deg) translate: 0. # image translation (+/- fraction) scale: 0. # image scale (+/- gain) shear: 0. # image shear (+/- deg) ================================================ FILE: pytorch_object_detection/yolov3_spp/cfg/yolov3-spp.cfg ================================================ [net] # Testing # batch=1 # subdivisions=1 # Training batch=64 subdivisions=16 width=608 height=608 channels=3 momentum=0.9 decay=0.0005 angle=0 saturation = 1.5 exposure = 1.5 hue=.1 learning_rate=0.001 burn_in=1000 max_batches = 500200 policy=steps steps=400000,450000 scales=.1,.1 [convolutional] batch_normalize=1 filters=32 size=3 stride=1 pad=1 activation=leaky # Downsample [convolutional] batch_normalize=1 filters=64 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=32 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=64 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear # Downsample [convolutional] batch_normalize=1 filters=128 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=64 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=64 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear # Downsample [convolutional] batch_normalize=1 filters=256 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear # Downsample [convolutional] batch_normalize=1 filters=512 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear # Downsample [convolutional] batch_normalize=1 filters=1024 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear ###################### [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky ### SPP ### [maxpool] stride=1 size=5 [route] layers=-2 [maxpool] stride=1 size=9 [route] layers=-4 [maxpool] stride=1 size=13 [route] layers=-1,-3,-5,-6 ### End SPP ### [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] batch_normalize=1 filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=255 activation=linear [yolo] mask = 6,7,8 anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 classes=80 num=9 jitter=.3 ignore_thresh = .7 truth_thresh = 1 random=1 [route] layers = -4 [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [upsample] stride=2 [route] layers = -1, 61 [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=512 activation=leaky [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=512 activation=leaky [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=512 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=255 activation=linear [yolo] mask = 3,4,5 anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 classes=80 num=9 jitter=.3 ignore_thresh = .7 truth_thresh = 1 random=1 [route] layers = -4 [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [upsample] stride=2 [route] layers = -1, 36 [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=256 activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=256 activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=256 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=255 activation=linear [yolo] mask = 0,1,2 anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 classes=80 num=9 jitter=.3 ignore_thresh = .7 truth_thresh = 1 random=1 ================================================ FILE: pytorch_object_detection/yolov3_spp/draw_box_utils.py ================================================ from PIL.Image import Image, fromarray import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from PIL import ImageColor import numpy as np STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def draw_text(draw, box: list, cls: int, score: float, category_index: dict, color: str, font: str = 'arial.ttf', font_size: int = 24): """ 将目标边界框和类别信息绘制到图片上 """ try: font = ImageFont.truetype(font, font_size) except IOError: font = ImageFont.load_default() left, top, right, bottom = box # If the total height of the display strings added to the top of the bounding # box exceeds the top of the image, stack the strings below the bounding box # instead of above. display_str = f"{category_index[str(cls)]}: {int(100 * score)}%" display_str_heights = [font.getsize(ds)[1] for ds in display_str] # Each display_str has a top and bottom margin of 0.05x. display_str_height = (1 + 2 * 0.05) * max(display_str_heights) if top > display_str_height: text_top = top - display_str_height text_bottom = top else: text_top = bottom text_bottom = bottom + display_str_height for ds in display_str: text_width, text_height = font.getsize(ds) margin = np.ceil(0.05 * text_width) draw.rectangle([(left, text_top), (left + text_width + 2 * margin, text_bottom)], fill=color) draw.text((left + margin, text_top), ds, fill='black', font=font) left += text_width def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5): np_image = np.array(image) masks = np.where(masks > thresh, True, False) # colors = np.array(colors) img_to_draw = np.copy(np_image) # TODO: There might be a way to vectorize this for mask, color in zip(masks, colors): img_to_draw[mask] = color out = np_image * (1 - alpha) + img_to_draw * alpha return fromarray(out.astype(np.uint8)) def draw_objs(image: Image, boxes: np.ndarray = None, classes: np.ndarray = None, scores: np.ndarray = None, masks: np.ndarray = None, category_index: dict = None, box_thresh: float = 0.1, mask_thresh: float = 0.5, line_thickness: int = 8, font: str = 'arial.ttf', font_size: int = 24, draw_boxes_on_image: bool = True, draw_masks_on_image: bool = False): """ 将目标边界框信息,类别信息,mask信息绘制在图片上 Args: image: 需要绘制的图片 boxes: 目标边界框信息 classes: 目标类别信息 scores: 目标概率信息 masks: 目标mask信息 category_index: 类别与名称字典 box_thresh: 过滤的概率阈值 mask_thresh: line_thickness: 边界框宽度 font: 字体类型 font_size: 字体大小 draw_boxes_on_image: draw_masks_on_image: Returns: """ # 过滤掉低概率的目标 idxs = np.greater(scores, box_thresh) boxes = boxes[idxs] classes = classes[idxs] scores = scores[idxs] if masks is not None: masks = masks[idxs] if len(boxes) == 0: return image colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes] if draw_boxes_on_image: # Draw all boxes onto image. draw = ImageDraw.Draw(image) for box, cls, score, color in zip(boxes, classes, scores, colors): left, top, right, bottom = box # 绘制目标边界框 draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=line_thickness, fill=color) # 绘制类别和概率信息 draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size) if draw_masks_on_image and (masks is not None): # Draw all mask onto image. image = draw_masks(image, masks, colors, mask_thresh) return image ================================================ FILE: pytorch_object_detection/yolov3_spp/export_onnx.py ================================================ import os import torch import cv2 import torch.onnx import onnx import onnxruntime import numpy as np import models from build_utils import img_utils device = torch.device("cpu") models.ONNX_EXPORT = True def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/yolov3-spp.cfg" weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size) assert os.path.exists(cfg), "cfg file does not exist..." assert os.path.exists(weights), "weights file does not exist..." input_size = (img_size, img_size) # [h, w] # create model model = models.Darknet(cfg, input_size) # load model weights model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() # input to the model # [batch, channel, height, width] # x = torch.rand(1, 3, *input_size, requires_grad=True) img_path = "test.jpg" img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path # preprocessing img img = img_utils.letterbox(img_o, new_shape=input_size, auto=False, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img).astype(np.float32) img /= 255.0 # scale (0, 255) to (0, 1) img = np.expand_dims(img, axis=0) # add batch dimension x = torch.tensor(img) torch_out = model(x) save_path = "yolov3spp.onnx" # export the model torch.onnx.export(model, # model being run x, # model input (or a tuple for multiple inputs) save_path, # where to save the model (can be a file or file-like object) export_params=True, # store the trained parameter weights inside the model file opset_version=12, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization input_names=["images"], # the model's input names # output_names=["classes", "boxes"], # the model's output names output_names=["prediction"], dynamic_axes={"images": {0: "batch_size"}, # variable length axes "prediction": {0: "batch_size"}}) # "classes": {0: "batch_size"}, # "confidence": {0: "batch_size"}, # "boxes": {0: "batch_size"}}) # check onnx model onnx_model = onnx.load(save_path) onnx.checker.check_model(onnx_model) # print(onnx.helper.printable_graph(onnx_model.graph)) ort_session = onnxruntime.InferenceSession(save_path) # compute ONNX Runtime output prediction ort_inputs = {"images": to_numpy(x)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and Pytorch results # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance. np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) # np.testing.assert_allclose(to_numpy(torch_out[1]), ort_outs[1], rtol=1e-03, atol=1e-05) # np.testing.assert_allclose(to_numpy(torch_out[2]), ort_outs[2], rtol=1e-03, atol=1e-05) print("Exported model has been tested with ONNXRuntime, and the result looks good!") if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/yolov3_spp/load_onnx_test.py ================================================ import time import cv2 import onnx import onnxruntime import numpy as np from matplotlib import pyplot as plt from draw_box_utils import draw_box def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def scale_img(img: np.ndarray, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale_fill=False, scale_up=True): """ 将图片缩放调整到指定大小,若需要填充,均匀填充到上下左右侧 :param img: 输入的图像numpy格式 :param new_shape: 输入网络的shape :param color: padding用什么颜色填充 :param auto: 将输入网络的较小边长调整到最近的64整数倍(输入图像的比例不变),这样输入网络的尺寸比指定尺寸要小,计算量也会减小 :param scale_fill: 简单粗暴缩放到指定大小 :param scale_up: 只缩小,不放大 :return: """ shape = img.shape[:2] # [h, w] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scale_up: # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变 r = min(r, 1.0) # compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimun rectangle 保证原图比例不变,将图像最大边缩放到指定大小 # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416),如果是(512x512)可以保证是64的整数倍 dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding elif scale_fill: # stretch 简单粗暴的将图片缩放到指定尺寸 dw, dh = 0, 0 new_unpad = new_shape ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # wh ratios dw /= 2 # divide padding into 2 sides 将padding分到上下,左右两侧 dh /= 2 # shape:[h, w] new_unpad:[w, h] if shape[::-1] != new_unpad: img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) # 计算上下两侧的padding left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 计算左右两侧的padding img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh) def clip_coords(boxes: np.ndarray, img_shape: tuple): # Clip bounding xyxy bounding boxes to image shape (height, width) boxes[:, 0].clip(0, img_shape[1]) # x1 boxes[:, 1].clip(0, img_shape[0]) # y1 boxes[:, 2].clip(0, img_shape[1]) # x2 boxes[:, 3].clip(0, img_shape[0]) # y2 def turn_back_coords(img1_shape, coords, img0_shape, ratio_pad=None): """ 将预测的坐标信息转换回原图尺度 :param img1_shape: 缩放后的图像尺度 :param coords: 预测的box信息 :param img0_shape: 缩放前的图像尺度 :param ratio_pad: 缩放过程中的缩放比例以及pad :return: """ # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = max(img1_shape) / max(img0_shape) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] coords[:, [0, 2]] -= pad[0] # x padding coords[:, [1, 3]] -= pad[1] # y padding coords[:, :4] /= gain clip_coords(coords, img0_shape) return coords def xywh2xyxy(x: np.ndarray): # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right y = np.zeros_like(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y return y def bboxes_iou(boxes1: np.ndarray, boxes2: np.ndarray) -> np.ndarray: boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) inter_section = np.maximum(right_down - left_up, 0.0) inter_area = inter_section[..., 0] * inter_section[..., 1] union_area = boxes1_area + boxes2_area - inter_area ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) return ious def nms(bboxes: np.ndarray, iou_threshold=0.5, soft_threshold=0.3, sigma=0.5, method="nms", ) -> np.ndarray: """ 单独对一个类别进行NMS处理 :param bboxes: [x1, y1, x2, y2, score] :param iou_threshold: nms算法中使用到的阈值 :param soft_threshold: soft-nms算法中使用到的阈值 :param sigma: soft-nms gaussian sigma :param method: nms或者soft-nms :return: 返回保留目标的索引 """ assert method in ["nms", "soft-nms"] # [x1, y1, x2, y2, score] -> [x1, y1, x2, y2, score, index] bboxes = np.concatenate([bboxes, np.arange(bboxes.shape[0]).reshape(-1, 1)], axis=1) best_bboxes_index = [] while len(bboxes) > 0: max_ind = np.argmax(bboxes[:, 4]) # 寻找概率最大目标索引 best_bbox = bboxes[max_ind] best_bboxes_index.append(best_bbox[5]) bboxes = np.concatenate([bboxes[:max_ind], bboxes[max_ind + 1:]]) # 将最大概率目标去除 ious = bboxes_iou(best_bbox[np.newaxis, :4], bboxes[:, :4]) if method == "nms": iou_mask = np.less(ious, iou_threshold) # < else: # soft-nms weight = np.exp(-(np.square(ious) / sigma)) bboxes[:, 4] = bboxes[:, 4] * weight iou_mask = np.greater(bboxes[:, 4], soft_threshold) # > bboxes = bboxes[iou_mask] return np.array(best_bboxes_index, dtype=np.int32) def post_process(pred: np.ndarray, multi_label=False, conf_thres=0.3): """ 输入的xywh都是归一化后的值 :param pred: [num_obj, [x1, y1, x2, y2, objectness, cls1, cls1...]] :param img_size: :param multi_label: :param conf_thres: :return: """ min_wh, max_wh = 2, 4096 pred = pred[pred[:, 4] > conf_thres] # 虑除小objectness目标 pred = pred[((pred[:, 2:4] > min_wh) & (pred[:, 2:4] < max_wh)).all(1)] # 虑除规定尺度范围外的目标 if pred.shape[0] == 0: return np.empty((0, 6)) # [x, y, x, y, score, class] box = xywh2xyxy(pred[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: # 针对每个类别执行非极大值抑制 # i, j = (x[:, 5:] > conf_thres).nonzero().t() # x = torch.cat((box[i], x[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1) pass else: # best class only 直接针对每个类别中概率最大的类别进行非极大值抑制处理 objectness = pred[:, 5:] class_index = np.argmax(objectness, axis=1) conf = objectness[(np.arange(pred.shape[0]), class_index)] # conf, j = predictions[:, 5:].max(1) pred = np.concatenate((box, np.expand_dims(conf, axis=1), np.expand_dims(class_index, axis=1)), 1)[conf > conf_thres] n = pred.shape[0] # number of boxes if n == 0: return np.empty((0, 6)) # [x, y, x, y, score, class] cls = pred[:, 5] # classes boxes, scores = pred[:, :4] + cls.reshape(-1, 1) * max_wh, pred[:, 4:5] t1 = time.time() indexes = nms(np.concatenate([boxes, scores], axis=1)) print("NMS time is {}".format(time.time() - t1)) pred = pred[indexes] return pred def main(): img_size = 512 save_path = "yolov3spp.onnx" img_path = "test.jpg" input_size = (img_size, img_size) # h, w # check onnx model onnx_model = onnx.load(save_path) onnx.checker.check_model(onnx_model) # print(onnx.helper.printable_graph(onnx_model.graph)) ort_session = onnxruntime.InferenceSession(save_path) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path # preprocessing img img, ratio, pad = scale_img(img_o, new_shape=input_size, auto=False, color=(0, 0, 0)) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img).astype(np.float32) img /= 255.0 # scale (0, 255) to (0, 1) img = np.expand_dims(img, axis=0) # add batch dimension # compute ONNX Runtime output prediction ort_inputs = {"images": img} t1 = time.time() # prediction: [num_obj, 85] pred = ort_session.run(None, ort_inputs)[0] t2 = time.time() print(t2 - t1) # print(predictions.shape[0]) # process detections # 这里预测的数值是相对坐标(0-1之间),乘上图像尺寸转回绝对坐标 pred[:, [0, 2]] *= input_size[1] pred[:, [1, 3]] *= input_size[0] pred = post_process(pred) # 将预测的bbox缩放回原图像尺度 p_boxes = turn_back_coords(img1_shape=img.shape[2:], coords=pred[:, :4], img0_shape=img_o.shape, ratio_pad=[ratio, pad]).round() # print(p_boxes.shape) bboxes = p_boxes scores = pred[:, 4] classes = pred[:, 5].astype(np.int) + 1 category_index = dict([(i + 1, str(i + 1)) for i in range(90)]) img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() if __name__ == '__main__': main() ================================================ FILE: pytorch_object_detection/yolov3_spp/models.py ================================================ from build_utils.layers import * from build_utils.parse_config import * ONNX_EXPORT = False def create_modules(modules_defs: list, img_size): """ Constructs module list of layer blocks from module configuration in module_defs :param modules_defs: 通过.cfg文件解析得到的每个层结构的列表 :param img_size: :return: """ img_size = [img_size] * 2 if isinstance(img_size, int) else img_size # 删除解析cfg列表中的第一个配置(对应[net]的配置) modules_defs.pop(0) # cfg training hyperparams (unused) output_filters = [3] # input channels module_list = nn.ModuleList() # 统计哪些特征层的输出会被后续的层使用到(可能是特征融合,也可能是拼接) routs = [] # list of layers which rout to deeper layers yolo_index = -1 # 遍历搭建每个层结构 for i, mdef in enumerate(modules_defs): modules = nn.Sequential() if mdef["type"] == "convolutional": bn = mdef["batch_normalize"] # 1 or 0 / use or not filters = mdef["filters"] k = mdef["size"] # kernel size stride = mdef["stride"] if "stride" in mdef else (mdef['stride_y'], mdef["stride_x"]) if isinstance(k, int): modules.add_module("Conv2d", nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=k, stride=stride, padding=k // 2 if mdef["pad"] else 0, bias=not bn)) else: raise TypeError("conv2d filter size must be int type.") if bn: modules.add_module("BatchNorm2d", nn.BatchNorm2d(filters)) else: # 如果该卷积操作没有bn层,意味着该层为yolo的predictor routs.append(i) # detection output (goes into yolo layer) if mdef["activation"] == "leaky": modules.add_module("activation", nn.LeakyReLU(0.1, inplace=True)) else: pass elif mdef["type"] == "BatchNorm2d": pass elif mdef["type"] == "maxpool": k = mdef["size"] # kernel size stride = mdef["stride"] modules = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) elif mdef["type"] == "upsample": if ONNX_EXPORT: # explicitly state size, avoid scale_factor g = (yolo_index + 1) * 2 / 32 # gain modules = nn.Upsample(size=tuple(int(x * g) for x in img_size)) else: modules = nn.Upsample(scale_factor=mdef["stride"]) elif mdef["type"] == "route": # [-2], [-1,-3,-5,-6], [-1, 61] layers = mdef["layers"] filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) routs.extend([i + l if l < 0 else l for l in layers]) modules = FeatureConcat(layers=layers) elif mdef["type"] == "shortcut": layers = mdef["from"] filters = output_filters[-1] # routs.extend([i + l if l < 0 else l for l in layers]) routs.append(i + layers[0]) modules = WeightedFeatureFusion(layers=layers, weight="weights_type" in mdef) elif mdef["type"] == "yolo": yolo_index += 1 # 记录是第几个yolo_layer [0, 1, 2] stride = [32, 16, 8] # 预测特征层对应原图的缩放比例 modules = YOLOLayer(anchors=mdef["anchors"][mdef["mask"]], # anchor list nc=mdef["classes"], # number of classes img_size=img_size, stride=stride[yolo_index]) # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) try: j = -1 # bias: shape(255,) 索引0对应Sequential中的Conv2d # view: shape(3, 85) b = module_list[j][0].bias.view(modules.na, -1) b.data[:, 4] += -4.5 # obj b.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) module_list[j][0].bias = torch.nn.Parameter(b.view(-1), requires_grad=True) except Exception as e: print('WARNING: smart bias initialization failure.', e) else: print("Warning: Unrecognized Layer Type: " + mdef["type"]) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) routs_binary = [False] * len(modules_defs) for i in routs: routs_binary[i] = True return module_list, routs_binary class YOLOLayer(nn.Module): """ 对YOLO的输出进行处理 """ def __init__(self, anchors, nc, img_size, stride): super(YOLOLayer, self).__init__() self.anchors = torch.Tensor(anchors) self.stride = stride # layer stride 特征图上一步对应原图上的步距 [32, 16, 8] self.na = len(anchors) # number of anchors (3) self.nc = nc # number of classes (80) self.no = nc + 5 # number of outputs (85: x, y, w, h, obj, cls1, ...) self.nx, self.ny, self.ng = 0, 0, (0, 0) # initialize number of x, y gridpoints # 将anchors大小缩放到grid尺度 self.anchor_vec = self.anchors / self.stride # batch_size, na, grid_h, grid_w, wh, # 值为1的维度对应的值不是固定值,后续操作可根据broadcast广播机制自动扩充 self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) self.grid = None if ONNX_EXPORT: self.training = False self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points def create_grids(self, ng=(13, 13), device="cpu"): """ 更新grids信息并生成新的grids参数 :param ng: 特征图大小 :param device: :return: """ self.nx, self.ny = ng self.ng = torch.tensor(ng, dtype=torch.float) # build xy offsets 构建每个cell处的anchor的xy偏移量(在feature map上的) if not self.training: # 训练模式不需要回归到最终预测boxes yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) # batch_size, na, grid_h, grid_w, wh self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() if self.anchor_vec.device != device: self.anchor_vec = self.anchor_vec.to(device) self.anchor_wh = self.anchor_wh.to(device) def forward(self, p): if ONNX_EXPORT: bs = 1 # batch size else: bs, _, ny, nx = p.shape # batch_size, predict_param(255), grid(13), grid(13) if (self.nx, self.ny) != (nx, ny) or self.grid is None: # fix no grid bug self.create_grids((nx, ny), p.device) # view: (batch_size, 255, 13, 13) -> (batch_size, 3, 85, 13, 13) # permute: (batch_size, 3, 85, 13, 13) -> (batch_size, 3, 13, 13, 85) # [bs, anchor, grid, grid, xywh + obj + classes] p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction if self.training: return p elif ONNX_EXPORT: # Avoid broadcasting for ANE operations m = self.na * self.nx * self.ny # 3* ng = 1. / self.ng.repeat(m, 1) grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng p = p.view(m, self.no) # xy = torch.sigmoid(p[:, 0:2]) + grid # x, y # wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height # p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ # torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf p[:, :2] = (torch.sigmoid(p[:, 0:2]) + grid) * ng # x, y p[:, 2:4] = torch.exp(p[:, 2:4]) * anchor_wh # width, height p[:, 4:] = torch.sigmoid(p[:, 4:]) p[:, 5:] = p[:, 5:self.no] * p[:, 4:5] return p else: # inference # [bs, anchor, grid, grid, xywh + obj + classes] io = p.clone() # inference output io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid # xy 计算在feature map上的xy坐标 io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method 计算在feature map上的wh io[..., :4] *= self.stride # 换算映射回原图尺度 torch.sigmoid_(io[..., 4:]) return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] class Darknet(nn.Module): """ YOLOv3 spp object detection model """ def __init__(self, cfg, img_size=(416, 416), verbose=False): super(Darknet, self).__init__() # 这里传入的img_size只在导出ONNX模型时起作用 self.input_size = [img_size] * 2 if isinstance(img_size, int) else img_size # 解析网络对应的.cfg文件 self.module_defs = parse_model_cfg(cfg) # 根据解析的网络结构一层一层去搭建 self.module_list, self.routs = create_modules(self.module_defs, img_size) # 获取所有YOLOLayer层的索引 self.yolo_layers = get_yolo_layers(self) # 打印下模型的信息,如果verbose为True则打印详细信息 self.info(verbose) if not ONNX_EXPORT else None # print model description def forward(self, x, verbose=False): return self.forward_once(x, verbose=verbose) def forward_once(self, x, verbose=False): # yolo_out收集每个yolo_layer层的输出 # out收集每个模块的输出 yolo_out, out = [], [] if verbose: print('0', x.shape) str = "" for i, module in enumerate(self.module_list): name = module.__class__.__name__ if name in ["WeightedFeatureFusion", "FeatureConcat"]: # sum, concat if verbose: l = [i - 1] + module.layers # layers sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers] # shapes str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)]) x = module(x, out) # WeightedFeatureFusion(), FeatureConcat() elif name == "YOLOLayer": yolo_out.append(module(x)) else: # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc. x = module(x) out.append(x if self.routs[i] else []) if verbose: print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str) str = '' if self.training: # train return yolo_out elif ONNX_EXPORT: # export # x = [torch.cat(x, 0) for x in zip(*yolo_out)] # return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4 p = torch.cat(yolo_out, dim=0) # # 根据objectness虑除低概率目标 # mask = torch.nonzero(torch.gt(p[:, 4], 0.1), as_tuple=False).squeeze(1) # # onnx不支持超过一维的索引(pytorch太灵活了) # # p = p[mask] # p = torch.index_select(p, dim=0, index=mask) # # # 虑除小面积目标,w > 2 and h > 2 pixel # # ONNX暂不支持bitwise_and和all操作 # mask_s = torch.gt(p[:, 2], 2./self.input_size[0]) & torch.gt(p[:, 3], 2./self.input_size[1]) # mask_s = torch.nonzero(mask_s, as_tuple=False).squeeze(1) # p = torch.index_select(p, dim=0, index=mask_s) # width-height 虑除小目标 # # if mask_s.numel() == 0: # return torch.empty([0, 85]) return p else: # inference or test x, p = zip(*yolo_out) # inference output, training output x = torch.cat(x, 1) # cat yolo outputs return x, p def info(self, verbose=False): """ 打印模型的信息 :param verbose: :return: """ torch_utils.model_info(self, verbose) def get_yolo_layers(self): """ 获取网络中三个"YOLOLayer"模块对应的索引 :param self: :return: """ return [i for i, m in enumerate(self.module_list) if m.__class__.__name__ == 'YOLOLayer'] # [89, 101, 113] ================================================ FILE: pytorch_object_detection/yolov3_spp/predict_test.py ================================================ import os import json import time import torch import cv2 import numpy as np from matplotlib import pyplot as plt from PIL import Image from build_utils import img_utils, torch_utils, utils from models import Darknet from draw_box_utils import draw_objs def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/my_yolov3.cfg" # 改成生成的.cfg文件 weights_path = "weights/yolov3spp-voc-512.pt" # 改成自己训练好的权重文件 json_path = "./data/pascal_voc_classes.json" # json标签文件 img_path = "test.jpg" assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg) assert os.path.exists(weights), "weights file {} dose not exist.".format(weights) assert os.path.exists(json_path), "json file {} dose not exist.".format(json_path) assert os.path.exists(img_path), "image file {} dose not exist.".format(img_path) with open(json_path, 'r') as f: class_dict = json.load(f) category_index = {str(v): str(k) for k, v in class_dict.items()} input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) weights_dict = torch.load(weights_path, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) model.eval() with torch.no_grad(): # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) if pred is None: print("No target detected.") exit(0) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 pil_img = Image.fromarray(img_o[:, :, ::-1]) plot_img = draw_objs(pil_img, bboxes, classes, scores, category_index=category_index, box_thresh=0.2, line_thickness=3, font='arial.ttf', font_size=20) plt.imshow(plot_img) plt.show() # 保存预测的图片结果 plot_img.save("test_result.jpg") if __name__ == "__main__": main() ================================================ FILE: pytorch_object_detection/yolov3_spp/requirements.txt ================================================ numpy opencv_python==4.3.0.36 lxml torch==1.7.1 torchvision==0.8.2 scipy pycocotools matplotlib tqdm tensorboard==2.1.0 PyYAML ================================================ FILE: pytorch_object_detection/yolov3_spp/results20210515-152935.txt ================================================ epoch:0 0.2934 0.6118 0.2275 0.0649 0.2581 0.3549 0.2737 0.4685 0.4842 0.1264 0.4246 0.5404 10.2811 0.001 epoch:1 0.4889 0.7742 0.5507 0.1366 0.3838 0.577 0.4132 0.6066 0.6205 0.2383 0.5264 0.6874 6.6461 0.000997 epoch:2 0.4981 0.7902 0.5599 0.1587 0.3863 0.5807 0.4162 0.6044 0.618 0.2694 0.5156 0.6828 6.0806 0.000989 epoch:3 0.4907 0.7876 0.546 0.1648 0.3848 0.5721 0.4133 0.6051 0.62 0.2874 0.5287 0.6791 5.8333 0.000976 epoch:4 0.5099 0.7901 0.5811 0.1726 0.4117 0.59 0.4224 0.6193 0.6344 0.2929 0.5474 0.6911 5.6126 0.00957 epoch:5 0.5218 0.8066 0.5912 0.178 0.4189 0.5916 0.4292 0.6251 0.6396 0.2879 0.5427 0.6997 5.6512 0.000934 epoch:6 0.5187 0.8009 0.5893 0.1794 0.4257 0.5925 0.422 0.6202 0.6359 0.3063 0.5635 0.6893 5.4484 0.000905 epoch:7 0.5336 0.8059 0.6076 0.1823 0.429 0.6138 0.4379 0.6346 0.6495 0.3054 0.5545 0.711 5.3175 0.000873 epoch:8 0.5498 0.8066 0.6218 0.1735 0.4369 0.6336 0.4456 0.6476 0.6626 0.3079 0.5653 0.7285 5.307 0.000836 epoch:9 0.5445 0.8057 0.6274 0.1825 0.445 0.6269 0.438 0.6411 0.6557 0.3076 0.5747 0.7181 5.148 0.000796 epoch:10 0.532 0.798 0.6059 0.1833 0.4272 0.6159 0.4395 0.6376 0.6512 0.3086 0.5537 0.7181 5.0508 0.000752 epoch:11 0.5574 0.8063 0.6272 0.1873 0.44 0.6416 0.4524 0.6543 0.6682 0.3139 0.5658 0.7358 5.1974 0.000706 epoch:12 0.5675 0.8088 0.6422 0.1985 0.4522 0.6542 0.4584 0.6609 0.6734 0.3248 0.5752 0.7415 4.9259 0.000658 epoch:13 0.5553 0.8114 0.6323 0.1854 0.439 0.6358 0.4466 0.6481 0.662 0.3285 0.5664 0.7247 4.7405 0.000608 epoch:14 0.5663 0.8106 0.6375 0.1873 0.4472 0.6588 0.4565 0.6612 0.6745 0.307 0.5721 0.7462 4.9034 0.000557 epoch:15 0.5627 0.8094 0.6354 0.1939 0.4462 0.6529 0.4526 0.6569 0.6703 0.3335 0.5734 0.7374 4.9803 0.000505 epoch:16 0.5677 0.8085 0.6402 0.1973 0.4517 0.6551 0.4573 0.6629 0.6762 0.3204 0.5772 0.7464 4.6182 0.000453 epoch:17 0.569 0.8107 0.6387 0.1954 0.4483 0.6604 0.46 0.6666 0.6802 0.3323 0.5761 0.7497 4.7454 0.000402 epoch:18 0.5783 0.8097 0.646 0.201 0.4564 0.669 0.4661 0.6719 0.6847 0.3265 0.5749 0.7567 4.4123 0.000352 epoch:19 0.5808 0.8111 0.642 0.188 0.4568 0.6721 0.4654 0.6734 0.6866 0.3172 0.5771 0.7591 4.5915 0.000304 epoch:20 0.5774 0.8078 0.6411 0.1916 0.4519 0.6753 0.4669 0.6745 0.6881 0.3247 0.5772 0.7601 4.6747 0.000258 epoch:21 0.5879 0.8127 0.6522 0.197 0.4559 0.683 0.4716 0.6801 0.6934 0.3189 0.5778 0.7702 4.485 0.000214 epoch:22 0.5858 0.8074 0.6515 0.2126 0.4622 0.6786 0.4695 0.677 0.6904 0.3427 0.5814 0.7609 4.5962 0.000174 epoch:23 0.5893 0.8127 0.6501 0.2056 0.4596 0.6851 0.4729 0.682 0.6954 0.3345 0.583 0.7703 4.4483 0.000137 epoch:24 0.5902 0.8123 0.654 0.197 0.4601 0.6867 0.4738 0.683 0.6969 0.3279 0.5835 0.773 4.4526 0.000105 epoch:25 0.5948 0.816 0.6585 0.2031 0.4691 0.6888 0.4766 0.6864 0.7002 0.3379 0.5899 0.7736 4.4878 0.000076 epoch:26 0.5921 0.8136 0.6555 0.2036 0.4706 0.6847 0.4746 0.6841 0.6975 0.3446 0.5915 0.7681 4.5259 0.000053 epoch:27 0.5896 0.8089 0.6511 0.204 0.4666 0.6839 0.4734 0.682 0.6949 0.3422 0.5852 0.7669 4.3678 0.000034 epoch:28 0.5956 0.8149 0.6579 0.2089 0.4683 0.6893 0.4768 0.6868 0.7008 0.3448 0.5908 0.7741 4.5182 0.000021 epoch:29 0.5907 0.8097 0.6508 0.2078 0.4701 0.6831 0.4726 0.682 0.695 0.3476 0.5906 0.7645 4.2529 0.000013 ================================================ FILE: pytorch_object_detection/yolov3_spp/train.py ================================================ import datetime import argparse import yaml import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.tensorboard import SummaryWriter from models import * from build_utils.datasets import * from build_utils.utils import * from train_utils import train_eval_utils as train_util from train_utils import get_coco_api_from_dataset def train(hyp): device = torch.device(opt.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) wdir = "weights" + os.sep # weights dir best = wdir + "best.pt" results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) cfg = opt.cfg data = opt.data epochs = opt.epochs batch_size = opt.batch_size accumulate = max(round(64 / batch_size), 1) # accumulate n times before optimizer update (bs 64) weights = opt.weights # initial training weights imgsz_train = opt.img_size imgsz_test = opt.img_size # test image sizes multi_scale = opt.multi_scale # Image sizes # 图像要设置成32的倍数 gs = 32 # (pixels) grid size assert math.fmod(imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs) grid_min, grid_max = imgsz_test // gs, imgsz_test // gs if multi_scale: imgsz_min = opt.img_size // 1.5 imgsz_max = opt.img_size // 0.667 # 将给定的最大,最小输入尺寸向下调整到32的整数倍 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs) imgsz_train = imgsz_max # initialize with max size print("Using multi_scale training, image range[{}, {}]".format(imgsz_min, imgsz_max)) # configure run # init_seeds() # 初始化随机种子,保证结果可复现 data_dict = parse_data_cfg(data) train_path = data_dict["train"] test_path = data_dict["valid"] nc = 1 if opt.single_cls else int(data_dict["classes"]) # number of classes hyp["cls"] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset hyp["obj"] *= imgsz_test / 320 # Remove previous results for f in glob.glob(results_file): os.remove(f) # Initialize model model = Darknet(cfg).to(device) # 是否冻结权重,只训练predictor的权重 if opt.freeze_layers: # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOLayer)] # 冻结除predictor和YOLOLayer外的所有层 freeze_layer_indeces = [x for x in range(len(model.module_list)) if (x not in output_layer_indices) and (x - 1 not in output_layer_indices)] # Freeze non-output layers # 总共训练3x2=6个parameters for idx in freeze_layer_indeces: for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) else: # 如果freeze_layer为False,默认仅训练除darknet53之后的部分 # 若要训练全部权重,删除以下代码 darknet_end_layer = 74 # only yolov3spp cfg # Freeze darknet53 layers # 总共训练21x3+3x2=69个parameters for idx in range(darknet_end_layer + 1): # [0, 74] for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=hyp["lr0"], momentum=hyp["momentum"], weight_decay=hyp["weight_decay"], nesterov=True) scaler = torch.cuda.amp.GradScaler() if opt.amp else None start_epoch = 0 best_map = 0.0 if weights.endswith(".pt") or weights.endswith(".pth"): ckpt = torch.load(weights, map_location=device) # load model try: ckpt["model"] = {k: v for k, v in ckpt["model"].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(ckpt["model"], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e # load optimizer if ckpt["optimizer"] is not None: optimizer.load_state_dict(ckpt["optimizer"]) if "best_map" in ckpt.keys(): best_map = ckpt["best_map"] # load results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # epochs start_epoch = ckpt["epoch"] + 1 if epochs < start_epoch: print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (opt.weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs if opt.amp and "scaler" in ckpt: scaler.load_state_dict(ckpt["scaler"]) del ckpt # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp["lrf"]) + hyp["lrf"] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch # 指定从哪个epoch开始 # Plot lr schedule # y = [] # for _ in range(epochs): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, '.-', label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # model.yolo_layers = model.module.yolo_layers # dataset # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸 train_dataset = LoadImagesAndLabels(train_path, imgsz_train, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls) # 验证集的图像尺寸指定为img_size(512) val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size, hyp=hyp, rect=True, # 将每个batch的图像调整到合适大小,可减少运算量(并不是512x512标准尺寸) cache_images=opt.cache_images, single_cls=opt.single_cls) # dataloader nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=nw, # Shuffle=True unless rectangular training is used shuffle=not opt.rect, pin_memory=True, collate_fn=train_dataset.collate_fn) val_datasetloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # 计算每个类别的目标个数,并计算每个类别的比重 # model.class_weights = labels_to_class_weights(train_dataset.labels, nc).to(device) # attach class weights # start training # caching val_data when you have plenty of memory(RAM) # coco = None coco = get_coco_api_from_dataset(val_dataset) print("starting traning for %g epochs..." % epochs) print('Using %g dataloader workers' % nw) for epoch in range(start_epoch, epochs): mloss, lr = train_util.train_one_epoch(model, optimizer, train_dataloader, device, epoch, accumulate=accumulate, # 迭代多少batch才训练完64张图片 img_size=imgsz_train, # 输入图像的大小 multi_scale=multi_scale, grid_min=grid_min, # grid的最小尺寸 grid_max=grid_max, # grid的最大尺寸 gs=gs, # grid step: 32 print_freq=50, # 每训练多少个step打印一次信息 warmup=True, scaler=scaler) # update scheduler scheduler.step() if opt.notest is False or epoch == epochs - 1: # evaluate on the test dataset result_info = train_util.evaluate(model, val_datasetloader, coco=coco, device=device) coco_mAP = result_info[0] voc_mAP = result_info[1] coco_mAR = result_info[8] # write into tensorboard if tb_writer: tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate", "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]"] for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags): tb_writer.add_scalar(tag, x, epoch) # write into txt with open(results_file, "a") as f: # 记录coco的12个指标加上训练总损失和lr result_info = [str(round(i, 4)) for i in result_info + [mloss.tolist()[-1]]] + [str(round(lr, 6))] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") # update best mAP(IoU=0.50:0.95) if coco_mAP > best_map: best_map = coco_mAP if opt.savebest is False: # save weights every epoch with open(results_file, 'r') as f: save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map} if opt.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./weights/yolov3spp-{}.pt".format(epoch)) else: # only save best weights if best_map == coco_mAP: with open(results_file, 'r') as f: save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map} if opt.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, best.format(epoch)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=4) parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help="*.cfg path") parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path') parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path') parser.add_argument('--multi-scale', type=bool, default=True, help='adjust (67%% - 150%%) img_size every 10 batches') parser.add_argument('--img-size', type=int, default=512, help='test size') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--savebest', type=bool, default=False, help='only save best checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics-512.pt', help='initial weights path') parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--freeze-layers', type=bool, default=False, help='Freeze non-output layers') # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") opt = parser.parse_args() # 检查文件是否存在 opt.cfg = check_file(opt.cfg) opt.data = check_file(opt.data) opt.hyp = check_file(opt.hyp) print(opt) with open(opt.hyp) as f: hyp = yaml.load(f, Loader=yaml.FullLoader) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter(comment=opt.name) train(hyp) ================================================ FILE: pytorch_object_detection/yolov3_spp/train_multi_GPU.py ================================================ import argparse import datetime import pickle import yaml import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.tensorboard import SummaryWriter from models import * from build_utils.datasets import * from build_utils.utils import * from train_utils import train_eval_utils as train_util from train_utils import get_coco_api_from_dataset, init_distributed_mode, torch_distributed_zero_first def main(opt, hyp): # 初始化各进程 init_distributed_mode(opt) if opt.rank in [-1, 0]: print(opt) print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') tb_writer = SummaryWriter(comment=opt.name) device = torch.device(opt.device) if "cuda" not in device.type: raise EnvironmentError("not find GPU device for training.") # 使用DDP后会对每个device上的gradients取均值,所以需要放大学习率 hyp["lr0"] *= max(1., opt.world_size * opt.batch_size / 64) wdir = "weights" + os.sep # weights dir best = wdir + "best.pt" results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) cfg = opt.cfg data = opt.data epochs = opt.epochs batch_size = opt.batch_size # accumulate n times before optimizer update (bs 64) accumulate = max(round(64 / (opt.world_size * opt.batch_size)), 1) weights = opt.weights # initial training weights imgsz_train = opt.img_size imgsz_test = opt.img_size # test image sizes multi_scale = opt.multi_scale # Image sizes # 图像要设置成32的倍数 gs = 32 # (pixels) grid size assert math.fmod(imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs) grid_min, grid_max = imgsz_test // gs, imgsz_test // gs if multi_scale: imgsz_min = opt.img_size // 1.5 imgsz_max = opt.img_size // 0.667 # 将给定的最大,最小输入尺寸向下调整到32的整数倍 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs) imgsz_train = imgsz_max # initialize with max size if opt.rank in [-1, 0]: # 只在第一个进程中显示打印信息 print("Using multi_scale training, image range[{}, {}]".format(imgsz_min, imgsz_max)) # configure run random.seed(0) # 设置随机种子 data_dict = parse_data_cfg(data) train_path = data_dict["train"] test_path = data_dict["valid"] nc = 1 if opt.single_cls else int(data_dict["classes"]) # number of classes hyp["cls"] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset hyp["obj"] *= imgsz_test / 320 if opt.rank in [-1, 0]: # Remove previous results for f in glob.glob(results_file) + glob.glob("tmp.pk"): os.remove(f) # Initialize model model = Darknet(cfg).to(device) start_epoch = 0 best_map = 0.0 # 如果指定了预训练权重,则载入预训练权重 if weights.endswith(".pt"): ckpt = torch.load(weights, map_location=device) # load model try: ckpt["model"] = {k: v for k, v in ckpt["model"].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(ckpt["model"], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e if opt.rank in [-1, 0]: # load results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # epochs start_epoch = ckpt["epoch"] + 1 if epochs < start_epoch: print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (opt.weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs if opt.amp and "scaler" in ckpt: scaler.load_state_dict(ckpt["scaler"]) del ckpt # 是否冻结权重,只训练predictor的权重 if opt.freeze_layers: # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOLayer)] # 冻结除predictor和YOLOLayer外的所有层 freeze_layer_indeces = [x for x in range(len(model.module_list)) if (x not in output_layer_indices) and (x - 1 not in output_layer_indices)] # Freeze non-output layers # 总共训练3x2=6个parameters for idx in freeze_layer_indeces: for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) else: # 如果freeze_layer为False,默认仅训练除darknet53之后的部分 # 若要训练全部权重,删除以下代码 darknet_end_layer = 74 # only yolov3spp cfg # Freeze darknet53 layers # 总共训练21x3+3x2=69个parameters for idx in range(darknet_end_layer + 1): # [0, 74] for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) # SyncBatchNorm # 如果只训练最后的predictor(其中不含bn层),SyncBatchNorm没有作用 if opt.freeze_layers is False: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu]) model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=hyp["lr0"], momentum=hyp["momentum"], weight_decay=hyp["weight_decay"], nesterov=True) scaler = torch.cuda.amp.GradScaler() if opt.amp else None # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp["lrf"]) + hyp["lrf"] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch # 指定从哪个epoch开始 # dataset # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸 # Make sure only the first process in DDP process the dataset first, and the following others can use the cache. with torch_distributed_zero_first(opt.rank): train_dataset = LoadImagesAndLabels(train_path, imgsz_train, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls, rank=opt.rank) # 验证集的图像尺寸指定为img_size(512) val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size, hyp=hyp, cache_images=opt.cache_images, single_cls=opt.single_cls, rank=opt.rank) # 给每个rank对应的进程分配训练的样本索引 train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) # 将样本索引每batch_size个元素组成一个list train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, batch_size, drop_last=True) # dataloader nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers if opt.rank in [-1, 0]: print('Using %g dataloader workers' % nw) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, num_workers=nw, pin_memory=True, collate_fn=train_dataset.collate_fn) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # start training # caching val_data when you have plenty of memory(RAM) with torch_distributed_zero_first(opt.rank): if os.path.exists("tmp.pk") is False: coco = get_coco_api_from_dataset(val_dataset) with open("tmp.pk", "wb") as f: pickle.dump(coco, f) else: with open("tmp.pk", "rb") as f: coco = pickle.load(f) if opt.rank in [-1, 0]: print("starting traning for %g epochs..." % epochs) print('Using %g dataloader workers' % nw) start_time = time.time() for epoch in range(start_epoch, epochs): train_sampler.set_epoch(epoch) mloss, lr = train_util.train_one_epoch(model, optimizer, train_data_loader, device, epoch, accumulate=accumulate, # 迭代多少batch才训练完64张图片 img_size=imgsz_train, # 输入图像的大小 multi_scale=multi_scale, grid_min=grid_min, # grid的最小尺寸 grid_max=grid_max, # grid的最大尺寸 gs=gs, # grid step: 32 print_freq=50, # 每训练多少个step打印一次信息 warmup=True, scaler=scaler) # update scheduler scheduler.step() if opt.notest is False or epoch == epochs - 1: # evaluate on the test dataset result_info = train_util.evaluate(model, val_data_loader, coco=coco, device=device) # only first process in DDP process to record info and save weights if opt.rank in [-1, 0]: coco_mAP = result_info[0] voc_mAP = result_info[1] coco_mAR = result_info[8] # write into tensorboard if tb_writer: tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate", "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]"] for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags): tb_writer.add_scalar(tag, x, epoch) # write into txt with open(results_file, "a") as f: # 记录coco的12个指标加上训练总损失和lr result_info = [str(round(i, 4)) for i in result_info + [mloss.tolist()[-1]]] + [str(round(lr, 6))] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") # update best mAP(IoU=0.50:0.95) if coco_mAP > best_map: best_map = coco_mAP if opt.savebest is False: # save weights every epoch with open(results_file, 'r') as f: save_files = { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map} if opt.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, "./weights/yolov3spp-{}.pt".format(epoch)) else: # only save best weights if best_map == coco_mAP: with open(results_file, 'r') as f: save_files = { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map} if opt.amp: save_files["scaler"] = scaler.state_dict() torch.save(save_files, best.format(epoch)) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) if opt.rank in [-1, 0]: print('Training time {}'.format(total_time_str)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=30) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help="*.cfg path") parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path') parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path') parser.add_argument('--multi-scale', type=bool, default=True, help='adjust (67%% - 150%%) img_size every 10 batches') parser.add_argument('--img-size', type=int, default=512, help='test size') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--savebest', type=bool, default=False, help='only save best checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics-512.pt', help='initial weights path') parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--freeze-layers', type=bool, default=False, help='Freeze non-output layers') # 开启的进程数(注意不是线程),不用设置该参数,会根据nproc_per_node自动设置 parser.add_argument('--world-size', default=4, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # 是否使用混合精度训练(需要GPU支持混合精度) parser.add_argument("--amp", default=False, help="Use torch.cuda.amp for mixed precision training") opt = parser.parse_args() # 检查文件是否存在 opt.cfg = check_file(opt.cfg) opt.data = check_file(opt.data) opt.hyp = check_file(opt.hyp) with open(opt.hyp) as f: hyp = yaml.load(f, Loader=yaml.FullLoader) main(opt, hyp) ================================================ FILE: pytorch_object_detection/yolov3_spp/train_utils/__init__.py ================================================ from .coco_utils import get_coco_api_from_dataset from .coco_eval import CocoEvaluator from .distributed_utils import init_distributed_mode, torch_distributed_zero_first ================================================ FILE: pytorch_object_detection/yolov3_spp/train_utils/coco_eval.py ================================================ import json import copy from collections import defaultdict import numpy as np import torch import torch._six from pycocotools.cocoeval import COCOeval from pycocotools.coco import COCO import pycocotools.mask as mask_util from .distributed_utils import all_gather class CocoEvaluator(object): def __init__(self, coco_gt, iou_types): assert isinstance(iou_types, (list, tuple)) coco_gt = copy.deepcopy(coco_gt) self.coco_gt = coco_gt self.iou_types = iou_types self.coco_eval = {} for iou_type in iou_types: self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) self.img_ids = [] self.eval_imgs = {k: [] for k in iou_types} def update(self, predictions): img_ids = list(np.unique(list(predictions.keys()))) self.img_ids.extend(img_ids) for iou_type in self.iou_types: results = self.prepare(predictions, iou_type) coco_dt = loadRes(self.coco_gt, results) if results else COCO() coco_eval = self.coco_eval[iou_type] coco_eval.cocoDt = coco_dt coco_eval.params.imgIds = list(img_ids) img_ids, eval_imgs = evaluate(coco_eval) self.eval_imgs[iou_type].append(eval_imgs) def synchronize_between_processes(self): for iou_type in self.iou_types: self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) def accumulate(self): for coco_eval in self.coco_eval.values(): coco_eval.accumulate() def summarize(self): for iou_type, coco_eval in self.coco_eval.items(): print("IoU metric: {}".format(iou_type)) coco_eval.summarize() def prepare(self, predictions, iou_type): if iou_type == "bbox": return self.prepare_for_coco_detection(predictions) elif iou_type == "segm": return self.prepare_for_coco_segmentation(predictions) elif iou_type == "keypoints": return self.prepare_for_coco_keypoint(predictions) else: raise ValueError("Unknown iou type {}".format(iou_type)) def prepare_for_coco_detection(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes) ] ) return coco_results def prepare_for_coco_segmentation(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue scores = prediction["scores"] labels = prediction["labels"] masks = prediction["masks"] masks = masks > 0.5 scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() rles = [ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], "segmentation": rle, "score": scores[k], } for k, rle in enumerate(rles) ] ) return coco_results def prepare_for_coco_keypoint(self, predictions): coco_results = [] for original_id, prediction in predictions.items(): if len(prediction) == 0: continue boxes = prediction["boxes"] boxes = convert_to_xywh(boxes).tolist() scores = prediction["scores"].tolist() labels = prediction["labels"].tolist() keypoints = prediction["keypoints"] keypoints = keypoints.flatten(start_dim=1).tolist() coco_results.extend( [ { "image_id": original_id, "category_id": labels[k], 'keypoints': keypoint, "score": scores[k], } for k, keypoint in enumerate(keypoints) ] ) return coco_results def convert_to_xywh(boxes): xmin, ymin, xmax, ymax = boxes.unbind(1) return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) def merge(img_ids, eval_imgs): all_img_ids = all_gather(img_ids) all_eval_imgs = all_gather(eval_imgs) merged_img_ids = [] for p in all_img_ids: merged_img_ids.extend(p) merged_eval_imgs = [] for p in all_eval_imgs: merged_eval_imgs.append(p) merged_img_ids = np.array(merged_img_ids) merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) # keep only unique (and in sorted order) images merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) merged_eval_imgs = merged_eval_imgs[..., idx] return merged_img_ids, merged_eval_imgs def create_common_coco_eval(coco_eval, img_ids, eval_imgs): img_ids, eval_imgs = merge(img_ids, eval_imgs) img_ids = list(img_ids) eval_imgs = list(eval_imgs.flatten()) coco_eval.evalImgs = eval_imgs coco_eval.params.imgIds = img_ids coco_eval._paramsEval = copy.deepcopy(coco_eval.params) ################################################################# # From pycocotools, just removed the prints and fixed # a Python3 bug about unicode not defined ################################################################# # Ideally, pycocotools wouldn't have hard-coded prints # so that we could avoid copy-pasting those two functions def createIndex(self): # create index # print('creating index...') anns, cats, imgs = {}, {}, {} imgToAnns, catToImgs = defaultdict(list), defaultdict(list) if 'annotations' in self.dataset: for ann in self.dataset['annotations']: imgToAnns[ann['image_id']].append(ann) anns[ann['id']] = ann if 'images' in self.dataset: for img in self.dataset['images']: imgs[img['id']] = img if 'categories' in self.dataset: for cat in self.dataset['categories']: cats[cat['id']] = cat if 'annotations' in self.dataset and 'categories' in self.dataset: for ann in self.dataset['annotations']: catToImgs[ann['category_id']].append(ann['image_id']) # print('index created!') # create class members self.anns = anns self.imgToAnns = imgToAnns self.catToImgs = catToImgs self.imgs = imgs self.cats = cats maskUtils = mask_util def loadRes(self, resFile): """ Load result file and return a result api object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] # print('Loading and preparing results...') # tic = time.time() if isinstance(resFile, torch._six.string_classes): anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) else: anns = resFile assert type(anns) == list, 'results in not an array of objects' annsImgIds = [ann['image_id'] for ann in anns] assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 'Results do not correspond to current coco set' if 'caption' in anns[0]: imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] for id, ann in enumerate(anns): ann['id'] = id + 1 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): bb = ann['bbox'] x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] if 'segmentation' not in ann: ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] ann['area'] = bb[2] * bb[3] ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'segmentation' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): # now only support compressed RLE format as segmentation results ann['area'] = maskUtils.area(ann['segmentation']) if 'bbox' not in ann: ann['bbox'] = maskUtils.toBbox(ann['segmentation']) ann['id'] = id + 1 ann['iscrowd'] = 0 elif 'keypoints' in anns[0]: res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) for id, ann in enumerate(anns): s = ann['keypoints'] x = s[0::3] y = s[1::3] x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) ann['area'] = (x2 - x1) * (y2 - y1) ann['id'] = id + 1 ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) res.dataset['annotations'] = anns createIndex(res) return res def evaluate(self): ''' Run per image evaluation on given images and store results (a list of dict) in self.evalImgs :return: None ''' # tic = time.time() # print('Running per image evaluation...') p = self.params # add backward compatibility if useSegm is specified in params if p.useSegm is not None: p.iouType = 'segm' if p.useSegm == 1 else 'bbox' print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) # print('Evaluate annotation type *{}*'.format(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) p.maxDets = sorted(p.maxDets) self.params = p self._prepare() # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] if p.iouType == 'segm' or p.iouType == 'bbox': computeIoU = self.computeIoU elif p.iouType == 'keypoints': computeIoU = self.computeOks self.ious = { (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds} evaluateImg = self.evaluateImg maxDet = p.maxDets[-1] evalImgs = [ evaluateImg(imgId, catId, areaRng, maxDet) for catId in catIds for areaRng in p.areaRng for imgId in p.imgIds ] # this is NOT in the pycocotools code, but could be done outside evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) self._paramsEval = copy.deepcopy(self.params) # toc = time.time() # print('DONE (t={:0.2f}s).'.format(toc-tic)) return p.imgIds, evalImgs ################################################################# # end of straight copy from pycocotools, just removing the prints ################################################################# ================================================ FILE: pytorch_object_detection/yolov3_spp/train_utils/coco_utils.py ================================================ from tqdm import tqdm import torch import torchvision import torch.utils.data from pycocotools.coco import COCO def convert_to_coco_api(ds): coco_ds = COCO() # annotation IDs need to start at 1, not 0 ann_id = 1 dataset = {'images': [], 'categories': [], 'annotations': []} categories = set() # 遍历dataset中的每张图像 for img_idx in tqdm(range(len(ds)), desc="loading eval info for coco tools."): # find better way to get target targets, shapes = ds.coco_index(img_idx) # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h) img_dict = {} img_dict['id'] = img_idx img_dict['height'] = shapes[0] img_dict['width'] = shapes[1] dataset['images'].append(img_dict) for obj in targets: ann = {} ann["image_id"] = img_idx # 将相对坐标转为绝对坐标 # box (x, y, w, h) boxes = obj[1:] # (x, y, w, h) to (xmin, ymin, w, h) boxes[:2] -= 0.5*boxes[2:] boxes[[0, 2]] *= img_dict["width"] boxes[[1, 3]] *= img_dict["height"] boxes = boxes.tolist() ann["bbox"] = boxes ann["category_id"] = int(obj[0]) categories.add(int(obj[0])) ann["area"] = boxes[2] * boxes[3] ann["iscrowd"] = 0 ann["id"] = ann_id dataset["annotations"].append(ann) ann_id += 1 dataset['categories'] = [{'id': i} for i in sorted(categories)] coco_ds.dataset = dataset coco_ds.createIndex() return coco_ds def get_coco_api_from_dataset(dataset): for _ in range(10): if isinstance(dataset, torchvision.datasets.CocoDetection): break if isinstance(dataset, torch.utils.data.Subset): dataset = dataset.dataset if isinstance(dataset, torchvision.datasets.CocoDetection): return dataset.coco return convert_to_coco_api(dataset) ================================================ FILE: pytorch_object_detection/yolov3_spp/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import pickle import time import errno import os from contextlib import contextmanager import torch import torch.distributed as dist class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) # deque简单理解成加强版list self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): # @property 是装饰器,这里可简单理解为增加median属性(只读) d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() if world_size == 1: return [data] # serialized to a Tensor buffer = pickle.dumps(data) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to("cuda") # obtain Tensor size of each rank local_size = torch.tensor([tensor.numel()], device="cuda") size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] dist.all_gather(size_list, local_size) size_list = [int(size.item()) for size in size_list] max_size = max(size_list) # receiving Tensor from all ranks # we pad the tensor because torch all_gather does not support # gathering tensors of different shapes tensor_list = [] for _ in size_list: tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) if local_size != max_size: padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") tensor = torch.cat((tensor, padding), dim=0) dist.all_gather(tensor_list, tensor) data_list = [] for size, tensor in zip(size_list, tensor_list): buffer = tensor.cpu().numpy().tobytes()[:size] data_list.append(pickle.loads(buffer)) return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: # 单GPU的情况 return input_dict with torch.no_grad(): # 多GPU的情况 names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = "" start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ":" + str(len(str(len(iterable)))) + "d" if torch.cuda.is_available(): log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}']) else: log_msg = self.delimiter.join([header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}']) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0 or i == len(iterable) - 1: eta_second = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=eta_second)) if torch.cuda.is_available(): print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format(i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {} ({:.4f} s / it)'.format(header, total_time_str, total_time / len(iterable))) def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): def f(x): """根据step数返回一个学习率倍率因子""" if x >= warmup_iters: # 当迭代数大于给定的warmup_iters时,倍率因子为1 return 1 alpha = float(x) / warmup_iters # 迭代过程中倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): """检查是否支持分布式环境""" if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.distributed.barrier() setup_for_distributed(args.rank == 0) @contextmanager def torch_distributed_zero_first(local_rank: int): """ Decorator to make all processes in distributed training wait for each local_master to do something. """ if local_rank not in [-1, 0]: torch.distributed.barrier() yield if local_rank == 0: torch.distributed.barrier() ================================================ FILE: pytorch_object_detection/yolov3_spp/train_utils/group_by_aspect_ratio.py ================================================ import bisect from collections import defaultdict import copy from itertools import repeat, chain import math import numpy as np import torch import torch.utils.data from torch.utils.data.sampler import BatchSampler, Sampler from torch.utils.model_zoo import tqdm import torchvision from PIL import Image def _repeat_to_at_least(iterable, n): repeat_times = math.ceil(n / len(iterable)) repeated = chain.from_iterable(repeat(iterable, repeat_times)) return list(repeated) class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. Arguments: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. The group ids must be a continuous set of integers starting from 0, i.e. they must be in the range [0, num_groups). batch_size (int): Size of mini-batch. """ def __init__(self, sampler, group_ids, batch_size): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = group_ids self.batch_size = batch_size def __iter__(self): buffer_per_group = defaultdict(list) samples_per_group = defaultdict(list) num_batches = 0 for idx in self.sampler: group_id = self.group_ids[idx] buffer_per_group[group_id].append(idx) samples_per_group[group_id].append(idx) if len(buffer_per_group[group_id]) == self.batch_size: yield buffer_per_group[group_id] num_batches += 1 del buffer_per_group[group_id] assert len(buffer_per_group[group_id]) < self.batch_size # now we have run out of elements that satisfy # the group criteria, let's return the remaining # elements so that the size of the sampler is # deterministic expected_num_batches = len(self) num_remaining = expected_num_batches - num_batches if num_remaining > 0: # for the remaining batches, take first the buffers with largest number # of elements for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 if num_remaining == 0: break assert num_remaining == 0 def __len__(self): return len(self.sampler) // self.batch_size def _compute_aspect_ratios_slow(dataset, indices=None): print("Your dataset doesn't support the fast path for " "computing the aspect ratios, so will iterate over " "the full dataset and load every image instead. " "This might take some time...") if indices is None: indices = range(len(dataset)) class SubsetSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __len__(self): return len(self.indices) sampler = SubsetSampler(indices) data_loader = torch.utils.data.DataLoader( dataset, batch_size=1, sampler=sampler, num_workers=14, # you might want to increase it for faster processing collate_fn=lambda x: x[0]) aspect_ratios = [] with tqdm(total=len(dataset)) as pbar: for _i, (img, _) in enumerate(data_loader): pbar.update(1) height, width = img.shape[-2:] aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_custom_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: height, width = dataset.get_height_and_width(i) aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_coco_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: img_info = dataset.coco.imgs[dataset.ids[i]] aspect_ratio = float(img_info["width"]) / float(img_info["height"]) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_voc_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) aspect_ratios = [] for i in indices: # this doesn't load the data into memory, because PIL loads it lazily width, height = Image.open(dataset.images[i]).size aspect_ratio = float(width) / float(height) aspect_ratios.append(aspect_ratio) return aspect_ratios def _compute_aspect_ratios_subset_dataset(dataset, indices=None): if indices is None: indices = range(len(dataset)) ds_indices = [dataset.indices[i] for i in indices] return compute_aspect_ratios(dataset.dataset, ds_indices) def compute_aspect_ratios(dataset, indices=None): if hasattr(dataset, "get_height_and_width"): return _compute_aspect_ratios_custom_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.CocoDetection): return _compute_aspect_ratios_coco_dataset(dataset, indices) if isinstance(dataset, torchvision.datasets.VOCDetection): return _compute_aspect_ratios_voc_dataset(dataset, indices) if isinstance(dataset, torch.utils.data.Subset): return _compute_aspect_ratios_subset_dataset(dataset, indices) # slow path return _compute_aspect_ratios_slow(dataset, indices) def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) # bisect_right:寻找y元素按顺序应该排在bins中哪个元素的右边,返回的是索引 quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) return quantized def create_aspect_ratio_groups(dataset, k=0): # 计算所有数据集中的图片width/height比例 aspect_ratios = compute_aspect_ratios(dataset) # 将[0.5, 2]区间划分成2*k+1等份 bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] # 统计所有图像比例在bins区间中的位置索引 groups = _quantize(aspect_ratios, bins) # count number of elements per group # 统计每个区间的频次 counts = np.unique(groups, return_counts=True)[1] fbins = [0] + bins + [np.inf] print("Using {} as bins for aspect ratio quantization".format(fbins)) print("Count of instances per bin: {}".format(counts)) return groups ================================================ FILE: pytorch_object_detection/yolov3_spp/train_utils/train_eval_utils.py ================================================ import sys from torch.cuda import amp import torch.nn.functional as F from build_utils.utils import * from .coco_eval import CocoEvaluator from .coco_utils import get_coco_api_from_dataset import train_utils.distributed_utils as utils def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, accumulate, img_size, grid_min, grid_max, gs, multi_scale=False, warmup=False, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0 and warmup is True: # 当训练第一轮(epoch=0)时,启用warmup训练方式,可理解为热身训练 warmup_factor = 1.0 / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) accumulate = 1 mloss = torch.zeros(4).to(device) # mean losses now_lr = 0. nb = len(data_loader) # number of batches # imgs: [batch_size, 3, img_size, img_size] # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h) # paths: list of img path for i, (imgs, targets, paths, _, _) in enumerate(metric_logger.log_every(data_loader, print_freq, header)): # ni 统计从epoch0开始的所有batch数 ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) # Multi-Scale if multi_scale: # 每训练64张图片,就随机修改一次输入图片大小, # 由于label已转为相对坐标,故缩放图片不影响label的值 if ni % accumulate == 0: # adjust img_size (67% - 150%) every 1 batch # 在给定最大最小输入尺寸范围内随机选取一个size(size为32的整数倍) img_size = random.randrange(grid_min, grid_max + 1) * gs sf = img_size / max(imgs.shape[2:]) # scale factor # 如果图片最大边长不等于img_size, 则缩放图片,并将长和宽调整到32的整数倍 if sf != 1: # gs: (pixels) grid size ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # 混合精度训练上下文管理器,如果在CPU环境中不起任何作用 with amp.autocast(enabled=scaler is not None): pred = model(imgs) # loss loss_dict = compute_loss(pred, targets, model) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purpose loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_items = torch.cat((loss_dict_reduced["box_loss"], loss_dict_reduced["obj_loss"], loss_dict_reduced["class_loss"], losses_reduced)).detach() mloss = (mloss * i + loss_items) / (i + 1) # update mean losses if not torch.isfinite(losses_reduced): print('WARNING: non-finite loss, ending training ', loss_dict_reduced) print("training image path: {}".format(",".join(paths))) sys.exit(1) losses *= 1. / accumulate # scale loss # backward if scaler is not None: scaler.scale(losses).backward() else: losses.backward() # optimize # 每训练64张图片更新一次权重 if ni % accumulate == 0: if scaler is not None: scaler.step(optimizer) scaler.update() else: optimizer.step() optimizer.zero_grad() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) now_lr = optimizer.param_groups[0]["lr"] metric_logger.update(lr=now_lr) if ni % accumulate == 0 and lr_scheduler is not None: # 第一轮使用warmup训练方式 lr_scheduler.step() return mloss, now_lr @torch.no_grad() def evaluate(model, data_loader, coco=None, device=None): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if coco is None: coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for imgs, targets, paths, shapes, img_index in metric_logger.log_every(data_loader, 100, header): imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 # targets = targets.to(device) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() pred = model(imgs)[0] # only get inference result pred = non_max_suppression(pred, conf_thres=0.01, iou_thres=0.6, multi_label=False) model_time = time.time() - model_time outputs = [] for index, p in enumerate(pred): if p is None: p = torch.empty((0, 6), device=cpu_device) boxes = torch.empty((0, 4), device=cpu_device) else: # xmin, ymin, xmax, ymax boxes = p[:, :4] # shapes: (h0, w0), ((h / h0, w / w0), pad) # 将boxes信息还原回原图尺度,这样计算的mAP才是准确的 boxes = scale_coords(imgs[index].shape[1:], boxes, shapes[index][0]).round() # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax,且为绝对坐标 info = {"boxes": boxes.to(cpu_device), "labels": p[:, 5].to(device=cpu_device, dtype=torch.int64), "scores": p[:, 4].to(cpu_device)} outputs.append(info) res = {img_id: output for img_id, output in zip(img_index, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() result_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist() # numpy to list return result_info def _get_iou_types(model): model_without_ddp = model if isinstance(model, torch.nn.parallel.DistributedDataParallel): model_without_ddp = model.module iou_types = ["bbox"] return iou_types ================================================ FILE: pytorch_object_detection/yolov3_spp/trans_voc2yolo.py ================================================ """ 本脚本有两个功能: 1.将voc数据集标注信息(.xml)转为yolo标注格式(.txt),并将图像文件复制到相应文件夹 2.根据json标签文件,生成对应names标签(my_data_label.names) """ import os from tqdm import tqdm from lxml import etree import json import shutil # voc数据集根目录以及版本 voc_root = "/data/VOCdevkit" voc_version = "VOC2012" # 转换的训练集以及验证集对应txt文件 train_txt = "train.txt" val_txt = "val.txt" # 转换后的文件保存目录 save_file_root = "./my_yolo_dataset" # label标签对应json文件 label_json_path = './data/pascal_voc_classes.json' # 拼接出voc的images目录,xml目录,txt目录 voc_images_path = os.path.join(voc_root, voc_version, "JPEGImages") voc_xml_path = os.path.join(voc_root, voc_version, "Annotations") train_txt_path = os.path.join(voc_root, voc_version, "ImageSets", "Main", train_txt) val_txt_path = os.path.join(voc_root, voc_version, "ImageSets", "Main", val_txt) # 检查文件/文件夹都是否存在 assert os.path.exists(voc_images_path), "VOC images path not exist..." assert os.path.exists(voc_xml_path), "VOC xml path not exist..." assert os.path.exists(train_txt_path), "VOC train txt file not exist..." assert os.path.exists(val_txt_path), "VOC val txt file not exist..." assert os.path.exists(label_json_path), "label_json_path does not exist..." if os.path.exists(save_file_root) is False: os.makedirs(save_file_root) def parse_xml_to_dict(xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def translate_info(file_names: list, save_root: str, class_dict: dict, train_val='train'): """ 将对应xml文件信息转为yolo中使用的txt文件信息 :param file_names: :param save_root: :param class_dict: :param train_val: :return: """ save_txt_path = os.path.join(save_root, train_val, "labels") if os.path.exists(save_txt_path) is False: os.makedirs(save_txt_path) save_images_path = os.path.join(save_root, train_val, "images") if os.path.exists(save_images_path) is False: os.makedirs(save_images_path) for file in tqdm(file_names, desc="translate {} file...".format(train_val)): # 检查下图像文件是否存在 img_path = os.path.join(voc_images_path, file + ".jpg") assert os.path.exists(img_path), "file:{} not exist...".format(img_path) # 检查xml文件是否存在 xml_path = os.path.join(voc_xml_path, file + ".xml") assert os.path.exists(xml_path), "file:{} not exist...".format(xml_path) # read xml with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = parse_xml_to_dict(xml)["annotation"] img_height = int(data["size"]["height"]) img_width = int(data["size"]["width"]) # write object info into txt assert "object" in data.keys(), "file: '{}' lack of object key.".format(xml_path) if len(data["object"]) == 0: # 如果xml文件中没有目标就直接忽略该样本 print("Warning: in '{}' xml, there are no objects.".format(xml_path)) continue with open(os.path.join(save_txt_path, file + ".txt"), "w") as f: for index, obj in enumerate(data["object"]): # 获取每个object的box信息 xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) class_name = obj["name"] class_index = class_dict[class_name] - 1 # 目标id从0开始 # 进一步检查数据,有的标注信息中可能有w或h为0的情况,这样的数据会导致计算回归loss为nan if xmax <= xmin or ymax <= ymin: print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path)) continue # 将box信息转换到yolo格式 xcenter = xmin + (xmax - xmin) / 2 ycenter = ymin + (ymax - ymin) / 2 w = xmax - xmin h = ymax - ymin # 绝对坐标转相对坐标,保存6位小数 xcenter = round(xcenter / img_width, 6) ycenter = round(ycenter / img_height, 6) w = round(w / img_width, 6) h = round(h / img_height, 6) info = [str(i) for i in [class_index, xcenter, ycenter, w, h]] if index == 0: f.write(" ".join(info)) else: f.write("\n" + " ".join(info)) # copy image into save_images_path path_copy_to = os.path.join(save_images_path, img_path.split(os.sep)[-1]) if os.path.exists(path_copy_to) is False: shutil.copyfile(img_path, path_copy_to) def create_class_names(class_dict: dict): keys = class_dict.keys() with open("./data/my_data_label.names", "w") as w: for index, k in enumerate(keys): if index + 1 == len(keys): w.write(k) else: w.write(k + "\n") def main(): # read class_indict json_file = open(label_json_path, 'r') class_dict = json.load(json_file) # 读取train.txt中的所有行信息,删除空行 with open(train_txt_path, "r") as r: train_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0] # voc信息转yolo,并将图像文件复制到相应文件夹 translate_info(train_file_names, save_file_root, class_dict, "train") # 读取val.txt中的所有行信息,删除空行 with open(val_txt_path, "r") as r: val_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0] # voc信息转yolo,并将图像文件复制到相应文件夹 translate_info(val_file_names, save_file_root, class_dict, "val") # 创建my_data_label.names文件 create_class_names(class_dict) if __name__ == "__main__": main() ================================================ FILE: pytorch_object_detection/yolov3_spp/validation.py ================================================ """ 该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标 以及每个类别的mAP(IoU=0.5) """ import json from models import * from build_utils.datasets import * from build_utils.utils import * from train_utils import get_coco_api_from_dataset, CocoEvaluator def summarize(self, catId=None): """ Compute and display summary metrics for evaluation results. Note this functin can *only* be applied on the default parameter setting """ def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): p = self.params iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' titleStr = 'Average Precision' if ap == 1 else 'Average Recall' typeStr = '(AP)' if ap == 1 else '(AR)' iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ if iouThr is None else '{:0.2f}'.format(iouThr) aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] # IoU if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, :, catId, aind, mind] else: s = s[:, :, :, aind, mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] if isinstance(catId, int): s = s[:, catId, aind, mind] else: s = s[:, :, aind, mind] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) return mean_s, print_string stats, print_list = [0] * 12, [""] * 12 stats[0], print_list[0] = _summarize(1) stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0]) stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1]) stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2]) stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) print_info = "\n".join(print_list) if not self.eval: raise Exception('Please run accumulate() first') return stats, print_info def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) # read class_indict label_json_path = './data/pascal_voc_classes.json' assert os.path.exists(label_json_path), "json file {} dose not exist.".format(label_json_path) with open(label_json_path, 'r') as f: class_dict = json.load(f) category_index = {v: k for k, v in class_dict.items()} data_dict = parse_data_cfg(parser_data.data) test_path = data_dict["valid"] # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = LoadImagesAndLabels(test_path, parser_data.img_size, batch_size, hyp=parser_data.hyp, rect=True) # 将每个batch的图像调整到合适大小,可减少运算量(并不是512x512标准尺寸) val_dataset_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # create model model = Darknet(parser_data.cfg, parser_data.img_size) weights_dict = torch.load(parser_data.weights, map_location='cpu') weights_dict = weights_dict["model"] if "model" in weights_dict else weights_dict model.load_state_dict(weights_dict) model.to(device) # evaluate on the test dataset coco = get_coco_api_from_dataset(val_dataset) iou_types = ["bbox"] coco_evaluator = CocoEvaluator(coco, iou_types) cpu_device = torch.device("cpu") model.eval() with torch.no_grad(): for imgs, targets, paths, shapes, img_index in tqdm(val_dataset_loader, desc="validation..."): imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 pred = model(imgs)[0] # only get inference result pred = non_max_suppression(pred, conf_thres=0.01, iou_thres=0.6, multi_label=False) outputs = [] for index, p in enumerate(pred): if p is None: p = torch.empty((0, 6), device=cpu_device) boxes = torch.empty((0, 4), device=cpu_device) else: # xmin, ymin, xmax, ymax boxes = p[:, :4] # shapes: (h0, w0), ((h / h0, w / w0), pad) # 将boxes信息还原回原图尺度,这样计算的mAP才是准确的 boxes = scale_coords(imgs[index].shape[1:], boxes, shapes[index][0]).round() # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax,且为绝对坐标 info = {"boxes": boxes.to(cpu_device), "labels": p[:, 5].to(device=cpu_device, dtype=torch.int64), "scores": p[:, 4].to(cpu_device)} outputs.append(info) res = {img_id: output for img_id, output in zip(img_index, outputs)} coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_eval = coco_evaluator.coco_eval["bbox"] # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_eval) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_eval, catId=i) voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = ["COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc] f.write("\n".join(record_lines)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 使用设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数 parser.add_argument('--num-classes', type=int, default='20', help='number of classes') parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help="*.cfg path") parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path') parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path') parser.add_argument('--img-size', type=int, default=512, help='test size') # 训练好的权重文件 parser.add_argument('--weights', default='./weights/yolov3spp-voc-512.pt', type=str, help='training weights') # batch size parser.add_argument('--batch_size', default=1, type=int, metavar='N', help='batch size when validation.') args = parser.parse_args() main(args) ================================================ FILE: pytorch_segmentation/deeplab_v3/README.md ================================================ # DeepLabV3(Rethinking Atrous Convolution for Semantic Image Segmentation) ## 该项目主要是来自pytorch官方torchvision模块中的源码 * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10 * Ubuntu或Centos(Windows暂不支持多GPU训练) * 最好使用GPU训练 * 详细环境配置见```requirements.txt``` ## 文件结构: ``` ├── src: 模型的backbone以及DeepLabv3的搭建 ├── train_utils: 训练、验证以及多GPU训练相关模块 ├── my_dataset.py: 自定义dataset用于读取VOC数据集 ├── train.py: 以deeplabv3_resnet50为例进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标,并生成record_mAP.txt文件 └── pascal_voc_classes.json: pascal_voc标签文件 ``` ## 预训练权重下载地址: * 注意:官方提供的预训练权重是在COCO上预训练得到的,训练时只针对和PASCAL VOC相同的类别进行了训练,所以类别数是21(包括背景) * deeplabv3_resnet50: https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth * deeplabv3_resnet101: https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth * deeplabv3_mobilenetv3_large_coco: https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是```deeplabv3_resnet50_coco.pth```文件, 不是```deeplabv3_resnet50_coco-cd0a2569.pth``` ## 数据集,本例程使用的是PASCAL VOC2012数据集 * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033 ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备) * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py``` ## 注意事项 * 在使用训练脚本时,注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录** * 在使用预测脚本时,要将'weights_path'设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可,其他代码尽量不要改动 ## 如果对DeepLabV3原理不是很理解可参考我的bilibili * https://www.bilibili.com/video/BV1Jb4y1q7j7 ## 进一步了解该项目,以及对DeepLabV3代码的分析可参考我的bilibili * https://www.bilibili.com/video/BV1TD4y1c7Wx ## Pytorch官方实现的DeeplabV3网络框架图 ![deeplabv3_resnet50_pytorch](./deeplabv3_resnet50.png) ================================================ FILE: pytorch_segmentation/deeplab_v3/get_palette.py ================================================ import json import numpy as np from PIL import Image # 读取mask标签 target = Image.open("./2007_001288.png") # 获取调色板 palette = target.getpalette() palette = np.reshape(palette, (-1, 3)).tolist() # 转换成字典子形式 pd = dict((i, color) for i, color in enumerate(palette)) json_str = json.dumps(pd) with open("palette.json", "w") as f: f.write(json_str) # target = np.array(target) # print(target) ================================================ FILE: pytorch_segmentation/deeplab_v3/my_dataset.py ================================================ import os import torch.utils.data as data from PIL import Image class VOCSegmentation(data.Dataset): def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): super(VOCSegmentation, self).__init__() assert year in ["2007", "2012"], "year must be in ['2007', '2012']" root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") assert os.path.exists(root), "path '{}' does not exist.".format(root) image_dir = os.path.join(root, 'JPEGImages') mask_dir = os.path.join(root, 'SegmentationClass') txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) with open(os.path.join(txt_path), "r") as f: file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] assert (len(self.images) == len(self.masks)) self.transforms = transforms def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is the image segmentation. """ img = Image.open(self.images[index]).convert('RGB') target = Image.open(self.masks[index]) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.images) @staticmethod def collate_fn(batch): images, targets = list(zip(*batch)) batched_imgs = cat_list(images, fill_value=0) batched_targets = cat_list(targets, fill_value=255) return batched_imgs, batched_targets def cat_list(images, fill_value=0): max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) batch_shape = (len(images),) + max_size batched_imgs = images[0].new(*batch_shape).fill_(fill_value) for img, pad_img in zip(images, batched_imgs): pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) return batched_imgs # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True)) # d1 = dataset[0] # print(d1) ================================================ FILE: pytorch_segmentation/deeplab_v3/palette.json ================================================ {"0": [0, 0, 0], "1": [128, 0, 0], "2": [0, 128, 0], "3": [128, 128, 0], "4": [0, 0, 128], "5": [128, 0, 128], "6": [0, 128, 128], "7": [128, 128, 128], "8": [64, 0, 0], "9": [192, 0, 0], "10": [64, 128, 0], "11": [192, 128, 0], "12": [64, 0, 128], "13": [192, 0, 128], "14": [64, 128, 128], "15": [192, 128, 128], "16": [0, 64, 0], "17": [128, 64, 0], "18": [0, 192, 0], "19": [128, 192, 0], "20": [0, 64, 128], "21": [128, 64, 128], "22": [0, 192, 128], "23": [128, 192, 128], "24": [64, 64, 0], "25": [192, 64, 0], "26": [64, 192, 0], "27": [192, 192, 0], "28": [64, 64, 128], "29": [192, 64, 128], "30": [64, 192, 128], "31": [192, 192, 128], "32": [0, 0, 64], "33": [128, 0, 64], "34": [0, 128, 64], "35": [128, 128, 64], "36": [0, 0, 192], "37": [128, 0, 192], "38": [0, 128, 192], "39": [128, 128, 192], "40": [64, 0, 64], "41": [192, 0, 64], "42": [64, 128, 64], "43": [192, 128, 64], "44": [64, 0, 192], "45": [192, 0, 192], "46": [64, 128, 192], "47": [192, 128, 192], "48": [0, 64, 64], "49": [128, 64, 64], "50": [0, 192, 64], "51": [128, 192, 64], "52": [0, 64, 192], "53": [128, 64, 192], "54": [0, 192, 192], "55": [128, 192, 192], "56": [64, 64, 64], "57": [192, 64, 64], "58": [64, 192, 64], "59": [192, 192, 64], "60": [64, 64, 192], "61": [192, 64, 192], "62": [64, 192, 192], "63": [192, 192, 192], "64": [32, 0, 0], "65": [160, 0, 0], "66": [32, 128, 0], "67": [160, 128, 0], "68": [32, 0, 128], "69": [160, 0, 128], "70": [32, 128, 128], "71": [160, 128, 128], "72": [96, 0, 0], "73": [224, 0, 0], "74": [96, 128, 0], "75": [224, 128, 0], "76": [96, 0, 128], "77": [224, 0, 128], "78": [96, 128, 128], "79": [224, 128, 128], "80": [32, 64, 0], "81": [160, 64, 0], "82": [32, 192, 0], "83": [160, 192, 0], "84": [32, 64, 128], "85": [160, 64, 128], "86": [32, 192, 128], "87": [160, 192, 128], "88": [96, 64, 0], "89": [224, 64, 0], "90": [96, 192, 0], "91": [224, 192, 0], "92": [96, 64, 128], "93": [224, 64, 128], "94": [96, 192, 128], "95": [224, 192, 128], "96": [32, 0, 64], "97": [160, 0, 64], "98": [32, 128, 64], "99": [160, 128, 64], "100": [32, 0, 192], "101": [160, 0, 192], "102": [32, 128, 192], "103": [160, 128, 192], "104": [96, 0, 64], "105": [224, 0, 64], "106": [96, 128, 64], "107": [224, 128, 64], "108": [96, 0, 192], "109": [224, 0, 192], "110": [96, 128, 192], "111": [224, 128, 192], "112": [32, 64, 64], "113": [160, 64, 64], "114": [32, 192, 64], "115": [160, 192, 64], "116": [32, 64, 192], "117": [160, 64, 192], "118": [32, 192, 192], "119": [160, 192, 192], "120": [96, 64, 64], "121": [224, 64, 64], "122": [96, 192, 64], "123": [224, 192, 64], "124": [96, 64, 192], "125": [224, 64, 192], "126": [96, 192, 192], "127": [224, 192, 192], "128": [0, 32, 0], "129": [128, 32, 0], "130": [0, 160, 0], "131": [128, 160, 0], "132": [0, 32, 128], "133": [128, 32, 128], "134": [0, 160, 128], "135": [128, 160, 128], "136": [64, 32, 0], "137": [192, 32, 0], "138": [64, 160, 0], "139": [192, 160, 0], "140": [64, 32, 128], "141": [192, 32, 128], "142": [64, 160, 128], "143": [192, 160, 128], "144": [0, 96, 0], "145": [128, 96, 0], "146": [0, 224, 0], "147": [128, 224, 0], "148": [0, 96, 128], "149": [128, 96, 128], "150": [0, 224, 128], "151": [128, 224, 128], "152": [64, 96, 0], "153": [192, 96, 0], "154": [64, 224, 0], "155": [192, 224, 0], "156": [64, 96, 128], "157": [192, 96, 128], "158": [64, 224, 128], "159": [192, 224, 128], "160": [0, 32, 64], "161": [128, 32, 64], "162": [0, 160, 64], "163": [128, 160, 64], "164": [0, 32, 192], "165": [128, 32, 192], "166": [0, 160, 192], "167": [128, 160, 192], "168": [64, 32, 64], "169": [192, 32, 64], "170": [64, 160, 64], "171": [192, 160, 64], "172": [64, 32, 192], "173": [192, 32, 192], "174": [64, 160, 192], "175": [192, 160, 192], "176": [0, 96, 64], "177": [128, 96, 64], "178": [0, 224, 64], "179": [128, 224, 64], "180": [0, 96, 192], "181": [128, 96, 192], "182": [0, 224, 192], "183": [128, 224, 192], "184": [64, 96, 64], "185": [192, 96, 64], "186": [64, 224, 64], "187": [192, 224, 64], "188": [64, 96, 192], "189": [192, 96, 192], "190": [64, 224, 192], "191": [192, 224, 192], "192": [32, 32, 0], "193": [160, 32, 0], "194": [32, 160, 0], "195": [160, 160, 0], "196": [32, 32, 128], "197": [160, 32, 128], "198": [32, 160, 128], "199": [160, 160, 128], "200": [96, 32, 0], "201": [224, 32, 0], "202": [96, 160, 0], "203": [224, 160, 0], "204": [96, 32, 128], "205": [224, 32, 128], "206": [96, 160, 128], "207": [224, 160, 128], "208": [32, 96, 0], "209": [160, 96, 0], "210": [32, 224, 0], "211": [160, 224, 0], "212": [32, 96, 128], "213": [160, 96, 128], "214": [32, 224, 128], "215": [160, 224, 128], "216": [96, 96, 0], "217": [224, 96, 0], "218": [96, 224, 0], "219": [224, 224, 0], "220": [96, 96, 128], "221": [224, 96, 128], "222": [96, 224, 128], "223": [224, 224, 128], "224": [32, 32, 64], "225": [160, 32, 64], "226": [32, 160, 64], "227": [160, 160, 64], "228": [32, 32, 192], "229": [160, 32, 192], "230": [32, 160, 192], "231": [160, 160, 192], "232": [96, 32, 64], "233": [224, 32, 64], "234": [96, 160, 64], "235": [224, 160, 64], "236": [96, 32, 192], "237": [224, 32, 192], "238": [96, 160, 192], "239": [224, 160, 192], "240": [32, 96, 64], "241": [160, 96, 64], "242": [32, 224, 64], "243": [160, 224, 64], "244": [32, 96, 192], "245": [160, 96, 192], "246": [32, 224, 192], "247": [160, 224, 192], "248": [96, 96, 64], "249": [224, 96, 64], "250": [96, 224, 64], "251": [224, 224, 64], "252": [96, 96, 192], "253": [224, 96, 192], "254": [96, 224, 192], "255": [224, 224, 192]} ================================================ FILE: pytorch_segmentation/deeplab_v3/pascal_voc_classes.json ================================================ { "aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, "bottle": 5, "bus": 6, "car": 7, "cat": 8, "chair": 9, "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, "motorbike": 14, "person": 15, "pottedplant": 16, "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20 } ================================================ FILE: pytorch_segmentation/deeplab_v3/predict.py ================================================ import os import time import json import torch from torchvision import transforms import numpy as np from PIL import Image from src import deeplabv3_resnet50 def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): aux = False # inference time not need aux_classifier classes = 20 weights_path = "./save_weights/model_29.pth" img_path = "./test.jpg" palette_path = "./palette.json" assert os.path.exists(weights_path), f"weights {weights_path} not found." assert os.path.exists(img_path), f"image {img_path} not found." assert os.path.exists(palette_path), f"palette {palette_path} not found." with open(palette_path, "rb") as f: pallette_dict = json.load(f) pallette = [] for v in pallette_dict.values(): pallette += v # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = deeplabv3_resnet50(aux=aux, num_classes=classes+1) # delete weights about aux_classifier weights_dict = torch.load(weights_path, map_location='cpu')['model'] for k in list(weights_dict.keys()): if "aux" in k: del weights_dict[k] # load weights model.load_state_dict(weights_dict) model.to(device) # load image original_img = Image.open(img_path) # from pil image to tensor and normalize data_transform = transforms.Compose([transforms.Resize(520), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init model img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() output = model(img.to(device)) t_end = time_synchronized() print("inference time: {}".format(t_end - t_start)) prediction = output['out'].argmax(1).squeeze(0) prediction = prediction.to("cpu").numpy().astype(np.uint8) mask = Image.fromarray(prediction) mask.putpalette(pallette) mask.save("test_result.png") if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/deeplab_v3/requirements.txt ================================================ numpy==1.22.0 torch==1.10.0 torchvision==0.11.1 Pillow ================================================ FILE: pytorch_segmentation/deeplab_v3/results20211027-104607.txt ================================================ [epoch: 0] train_loss: 0.7098 lr: 0.000100 global correct: 94.7 average row correct: ['97.0', '93.9', '86.4', '93.1', '74.4', '64.0', '97.9', '84.7', '95.7', '63.8', '92.2', '68.8', '88.8', '90.5', '93.0', '95.3', '75.3', '94.2', '83.6', '91.9', '75.0'] IoU: ['93.9', '91.5', '42.6', '88.9', '65.3', '60.4', '95.9', '76.3', '90.7', '50.9', '87.3', '54.1', '86.1', '83.0', '87.2', '89.3', '64.3', '91.0', '58.9', '84.5', '73.6'] mean IoU: 76.9 [epoch: 1] train_loss: 0.6005 lr: 0.000077 global correct: 94.7 average row correct: ['96.1', '96.3', '84.1', '95.1', '84.1', '81.4', '98.6', '85.8', '96.6', '68.7', '91.9', '71.3', '93.9', '91.4', '96.4', '95.5', '81.0', '94.1', '85.4', '94.9', '83.6'] IoU: ['93.9', '92.3', '42.2', '88.8', '69.1', '71.2', '96.1', '75.1', '91.9', '48.9', '87.8', '56.0', '87.9', '85.7', '89.2', '89.5', '63.6', '90.3', '56.2', '85.3', '79.8'] mean IoU: 78.1 [epoch: 2] train_loss: 0.5840 lr: 0.000054 global correct: 94.8 average row correct: ['96.2', '95.5', '85.8', '94.6', '85.5', '83.7', '98.8', '87.5', '96.3', '71.4', '92.5', '72.8', '93.1', '91.9', '96.7', '94.9', '81.5', '95.3', '82.8', '95.3', '84.1'] IoU: ['94.0', '91.2', '42.7', '88.3', '69.2', '72.7', '96.4', '74.8', '92.0', '49.8', '87.5', '58.3', '87.3', '85.0', '89.3', '89.2', '62.6', '89.6', '58.1', '84.8', '80.3'] mean IoU: 78.2 [epoch: 3] train_loss: 0.5637 lr: 0.000029 global correct: 94.8 average row correct: ['96.1', '95.9', '81.7', '94.8', '86.5', '79.4', '99.0', '89.1', '95.8', '71.4', '93.8', '71.0', '93.4', '92.4', '97.3', '94.9', '80.4', '96.9', '83.3', '94.7', '84.4'] IoU: ['94.0', '89.5', '41.8', '87.6', '69.0', '70.4', '96.0', '75.9', '92.1', '49.7', '87.3', '58.1', '86.2', '83.9', '88.7', '89.2', '63.7', '88.8', '57.7', '85.3', '79.9'] mean IoU: 77.8 [epoch: 4] train_loss: 0.5779 lr: 0.000000 global correct: 94.8 average row correct: ['96.3', '93.6', '85.9', '95.1', '82.6', '83.8', '98.5', '90.0', '95.9', '71.1', '93.2', '68.4', '92.6', '93.9', '95.9', '94.5', '82.8', '96.3', '82.8', '94.5', '86.4'] IoU: ['94.1', '91.8', '42.5', '88.5', '67.8', '72.1', '96.6', '78.3', '92.0', '49.8', '88.3', '58.8', '86.7', '84.9', '89.0', '89.5', '61.0', '89.1', '56.6', '84.6', '80.2'] mean IoU: 78.2 ================================================ FILE: pytorch_segmentation/deeplab_v3/src/__init__.py ================================================ from .deeplabv3_model import deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenetv3_large ================================================ FILE: pytorch_segmentation/deeplab_v3/src/deeplabv3_model.py ================================================ from collections import OrderedDict from typing import Dict, List import torch from torch import nn, Tensor from torch.nn import functional as F from .resnet_backbone import resnet50, resnet101 from .mobilenet_backbone import mobilenet_v3_large class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Args: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ _version = 2 __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None: if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} # 重新构建backbone,将没有使用到的模块全部删掉 layers = OrderedDict() for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super(IntermediateLayerGetter, self).__init__(layers) self.return_layers = orig_return_layers def forward(self, x: Tensor) -> Dict[str, Tensor]: out = OrderedDict() for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class DeepLabV3(nn.Module): """ Implements DeepLabV3 model from `"Rethinking Atrous Convolution for Semantic Image Segmentation" `_. Args: backbone (nn.Module): the network used to compute the features for the model. The backbone should return an OrderedDict[Tensor], with the key being "out" for the last feature map used, and "aux" if an auxiliary classifier is used. classifier (nn.Module): module that takes the "out" element returned from the backbone and returns a dense prediction. aux_classifier (nn.Module, optional): auxiliary classifier used during training """ __constants__ = ['aux_classifier'] def __init__(self, backbone, classifier, aux_classifier=None): super(DeepLabV3, self).__init__() self.backbone = backbone self.classifier = classifier self.aux_classifier = aux_classifier def forward(self, x: Tensor) -> Dict[str, Tensor]: input_shape = x.shape[-2:] # contract: features is a dict of tensors features = self.backbone(x) result = OrderedDict() x = features["out"] x = self.classifier(x) # 使用双线性插值还原回原图尺度 x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) result["out"] = x if self.aux_classifier is not None: x = features["aux"] x = self.aux_classifier(x) # 使用双线性插值还原回原图尺度 x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) result["aux"] = x return result class FCNHead(nn.Sequential): def __init__(self, in_channels, channels): inter_channels = in_channels // 4 super(FCNHead, self).__init__( nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), nn.BatchNorm2d(inter_channels), nn.ReLU(), nn.Dropout(0.1), nn.Conv2d(inter_channels, channels, 1) ) class ASPPConv(nn.Sequential): def __init__(self, in_channels: int, out_channels: int, dilation: int) -> None: super(ASPPConv, self).__init__( nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU() ) class ASPPPooling(nn.Sequential): def __init__(self, in_channels: int, out_channels: int) -> None: super(ASPPPooling, self).__init__( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, out_channels, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU() ) def forward(self, x: torch.Tensor) -> torch.Tensor: size = x.shape[-2:] for mod in self: x = mod(x) return F.interpolate(x, size=size, mode='bilinear', align_corners=False) class ASPP(nn.Module): def __init__(self, in_channels: int, atrous_rates: List[int], out_channels: int = 256) -> None: super(ASPP, self).__init__() modules = [ nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU()) ] rates = tuple(atrous_rates) for rate in rates: modules.append(ASPPConv(in_channels, out_channels, rate)) modules.append(ASPPPooling(in_channels, out_channels)) self.convs = nn.ModuleList(modules) self.project = nn.Sequential( nn.Conv2d(len(self.convs) * out_channels, out_channels, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(), nn.Dropout(0.5) ) def forward(self, x: torch.Tensor) -> torch.Tensor: _res = [] for conv in self.convs: _res.append(conv(x)) res = torch.cat(_res, dim=1) return self.project(res) class DeepLabHead(nn.Sequential): def __init__(self, in_channels: int, num_classes: int) -> None: super(DeepLabHead, self).__init__( ASPP(in_channels, [12, 24, 36]), nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, num_classes, 1) ) def deeplabv3_resnet50(aux, num_classes=21, pretrain_backbone=False): # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth' # 'deeplabv3_resnet50_coco': 'https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth' backbone = resnet50(replace_stride_with_dilation=[False, True, True]) if pretrain_backbone: # 载入resnet50 backbone预训练权重 backbone.load_state_dict(torch.load("resnet50.pth", map_location='cpu')) out_inplanes = 2048 aux_inplanes = 1024 return_layers = {'layer4': 'out'} if aux: return_layers['layer3'] = 'aux' backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) aux_classifier = None # why using aux: https://github.com/pytorch/vision/issues/4292 if aux: aux_classifier = FCNHead(aux_inplanes, num_classes) classifier = DeepLabHead(out_inplanes, num_classes) model = DeepLabV3(backbone, classifier, aux_classifier) return model def deeplabv3_resnet101(aux, num_classes=21, pretrain_backbone=False): # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth' # 'deeplabv3_resnet101_coco': 'https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth' backbone = resnet101(replace_stride_with_dilation=[False, True, True]) if pretrain_backbone: # 载入resnet101 backbone预训练权重 backbone.load_state_dict(torch.load("resnet101.pth", map_location='cpu')) out_inplanes = 2048 aux_inplanes = 1024 return_layers = {'layer4': 'out'} if aux: return_layers['layer3'] = 'aux' backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) aux_classifier = None # why using aux: https://github.com/pytorch/vision/issues/4292 if aux: aux_classifier = FCNHead(aux_inplanes, num_classes) classifier = DeepLabHead(out_inplanes, num_classes) model = DeepLabV3(backbone, classifier, aux_classifier) return model def deeplabv3_mobilenetv3_large(aux, num_classes=21, pretrain_backbone=False): # 'mobilenetv3_large_imagenet': 'https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth' # 'depv3_mobilenetv3_large_coco': "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth" backbone = mobilenet_v3_large(dilated=True) if pretrain_backbone: # 载入mobilenetv3 large backbone预训练权重 backbone.load_state_dict(torch.load("mobilenet_v3_large.pth", map_location='cpu')) backbone = backbone.features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "is_strided", False)] + [len(backbone) - 1] out_pos = stage_indices[-1] # use C5 which has output_stride = 16 out_inplanes = backbone[out_pos].out_channels aux_pos = stage_indices[-4] # use C2 here which has output_stride = 8 aux_inplanes = backbone[aux_pos].out_channels return_layers = {str(out_pos): "out"} if aux: return_layers[str(aux_pos)] = "aux" backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) aux_classifier = None # why using aux: https://github.com/pytorch/vision/issues/4292 if aux: aux_classifier = FCNHead(aux_inplanes, num_classes) classifier = DeepLabHead(out_inplanes, num_classes) model = DeepLabV3(backbone, classifier, aux_classifier) return model ================================================ FILE: pytorch_segmentation/deeplab_v3/src/mobilenet_backbone.py ================================================ from typing import Callable, List, Optional import torch from torch import nn, Tensor from torch.nn import functional as F from functools import partial def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNActivation(nn.Sequential): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None, dilation: int = 1): padding = (kernel_size - 1) // 2 * dilation if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.ReLU6 super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding, groups=groups, bias=False), norm_layer(out_planes), activation_layer(inplace=True)) self.out_channels = out_planes class SqueezeExcitation(nn.Module): def __init__(self, input_c: int, squeeze_factor: int = 4): super(SqueezeExcitation, self).__init__() squeeze_c = _make_divisible(input_c // squeeze_factor, 8) self.fc1 = nn.Conv2d(input_c, squeeze_c, 1) self.fc2 = nn.Conv2d(squeeze_c, input_c, 1) def forward(self, x: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) return scale * x class InvertedResidualConfig: def __init__(self, input_c: int, kernel: int, expanded_c: int, out_c: int, use_se: bool, activation: str, stride: int, dilation: int, width_multi: float): self.input_c = self.adjust_channels(input_c, width_multi) self.kernel = kernel self.expanded_c = self.adjust_channels(expanded_c, width_multi) self.out_c = self.adjust_channels(out_c, width_multi) self.use_se = use_se self.use_hs = activation == "HS" # whether using h-swish activation self.stride = stride self.dilation = dilation @staticmethod def adjust_channels(channels: int, width_multi: float): return _make_divisible(channels * width_multi, 8) class InvertedResidual(nn.Module): def __init__(self, cnf: InvertedResidualConfig, norm_layer: Callable[..., nn.Module]): super(InvertedResidual, self).__init__() if cnf.stride not in [1, 2]: raise ValueError("illegal stride value.") self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c) layers: List[nn.Module] = [] activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU # expand if cnf.expanded_c != cnf.input_c: layers.append(ConvBNActivation(cnf.input_c, cnf.expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer)) # depthwise stride = 1 if cnf.dilation > 1 else cnf.stride layers.append(ConvBNActivation(cnf.expanded_c, cnf.expanded_c, kernel_size=cnf.kernel, stride=stride, dilation=cnf.dilation, groups=cnf.expanded_c, norm_layer=norm_layer, activation_layer=activation_layer)) if cnf.use_se: layers.append(SqueezeExcitation(cnf.expanded_c)) # project layers.append(ConvBNActivation(cnf.expanded_c, cnf.out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)) self.block = nn.Sequential(*layers) self.out_channels = cnf.out_c self.is_strided = cnf.stride > 1 def forward(self, x: Tensor) -> Tensor: result = self.block(x) if self.use_res_connect: result += x return result class MobileNetV3(nn.Module): def __init__(self, inverted_residual_setting: List[InvertedResidualConfig], last_channel: int, num_classes: int = 1000, block: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None): super(MobileNetV3, self).__init__() if not inverted_residual_setting: raise ValueError("The inverted_residual_setting should not be empty.") elif not (isinstance(inverted_residual_setting, List) and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])): raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") if block is None: block = InvertedResidual if norm_layer is None: norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) layers: List[nn.Module] = [] # building first layer firstconv_output_c = inverted_residual_setting[0].input_c layers.append(ConvBNActivation(3, firstconv_output_c, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=nn.Hardswish)) # building inverted residual blocks for cnf in inverted_residual_setting: layers.append(block(cnf, norm_layer)) # building last several layers lastconv_input_c = inverted_residual_setting[-1].out_c lastconv_output_c = 6 * lastconv_input_c layers.append(ConvBNActivation(lastconv_input_c, lastconv_output_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Hardswish)) self.features = nn.Sequential(*layers) self.avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel), nn.Hardswish(inplace=True), nn.Dropout(p=0.2, inplace=True), nn.Linear(last_channel, num_classes)) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def _forward_impl(self, x: Tensor) -> Tensor: x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def mobilenet_v3_large(num_classes: int = 1000, reduced_tail: bool = False, dilated: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. dilated: whether using dilated conv """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 dilation = 2 if dilated else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation bneck_conf(16, 3, 16, 16, False, "RE", 1, 1), bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1 bneck_conf(24, 3, 72, 24, False, "RE", 1, 1), bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2 bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3 bneck_conf(80, 3, 200, 80, False, "HS", 1, 1), bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), bneck_conf(80, 3, 480, 112, True, "HS", 1, 1), bneck_conf(112, 3, 672, 112, True, "HS", 1, 1), bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4 bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), ] last_channel = adjust_channels(1280 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) def mobilenet_v3_small(num_classes: int = 1000, reduced_tail: bool = False, dilated: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. dilated: whether using dilated conv """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 dilation = 2 if dilated else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1 bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2 bneck_conf(24, 3, 88, 24, False, "RE", 1, 1), bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3 bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), bneck_conf(40, 5, 120, 48, True, "HS", 1, 1), bneck_conf(48, 5, 144, 48, True, "HS", 1, 1), bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4 bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation) ] last_channel = adjust_channels(1024 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) ================================================ FILE: pytorch_segmentation/deeplab_v3/src/resnet_backbone.py ================================================ import torch import torch.nn as nn def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation) def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) class Bottleneck(nn.Module): # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) # while original implementation places the stride at the first 1x1 convolution(self.conv1) # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. # This variant is also known as ResNet V1.5 and improves accuracy according to # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): super(Bottleneck, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width / 64.)) * groups # Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width) self.bn1 = norm_layer(width) self.conv2 = conv3x3(width, width, stride, groups, dilation) self.bn2 = norm_layer(width) self.conv3 = conv1x1(width, planes * self.expansion) self.bn3 = norm_layer(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer = self._norm_layer downsample = None previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer)) return nn.Sequential(*layers) def _forward_impl(self, x): # See note [TorchScript super()] x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def forward(self, x): return self._forward_impl(x) def _resnet(block, layers, **kwargs): model = ResNet(block, layers, **kwargs) return model def resnet50(**kwargs): r"""ResNet-50 model from `"Deep Residual Learning for Image Recognition" `_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs) def resnet101(**kwargs): r"""ResNet-101 model from `"Deep Residual Learning for Image Recognition" `_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs) ================================================ FILE: pytorch_segmentation/deeplab_v3/train.py ================================================ import os import time import datetime import torch from src import deeplabv3_resnet50 from train_utils import train_one_epoch, evaluate, create_lr_scheduler from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(2.0 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train): base_size = 520 crop_size = 480 return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size) def create_model(aux, num_classes, pretrain=True): model = deeplabv3_resnet50(aux=aux, num_classes=num_classes) if pretrain: weights_dict = torch.load("./deeplabv3_resnet50_coco.pth", map_location='cpu') if num_classes != 21: # 官方提供的预训练权重是21类(包括背景) # 如果训练自己的数据集,将和类别相关的权重删除,防止权重shape不一致报错 for k in list(weights_dict.keys()): if "classifier.4" in k: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") batch_size = args.batch_size # segmentation nun_classes + background num_classes = args.num_classes + 1 # 用来保存训练以及验证过程中信息 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt train_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=True), txt_name="train.txt") # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=False), txt_name="val.txt") num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = create_model(aux=args.aux, num_classes=num_classes) model.to(device) params_to_optimize = [ {"params": [p for p in model.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model.classifier.parameters() if p.requires_grad]} ] if args.aux: params = [p for p in model.aux_classifier.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr * 10}) optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True) # import matplotlib.pyplot as plt # lr_list = [] # for _ in range(args.epochs): # for _ in range(len(train_loader)): # lr_scheduler.step() # lr = optimizer.param_groups[0]["lr"] # lr_list.append(lr) # plt.plot(range(len(lr_list)), lr_list) # plt.show() if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) start_time = time.time() for epoch in range(args.start_epoch, args.epochs): mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" f.write(train_info + val_info + "\n\n") save_file = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args} if args.amp: save_file["scaler"] = scaler.state_dict() torch.save(save_file, "save_weights/model_{}.pth".format(epoch)) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("training time {}".format(total_time_str)) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch deeplabv3 training") parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") parser.add_argument("--num-classes", default=20, type=int) parser.add_argument("--aux", default=True, type=bool, help="auxilier loss") parser.add_argument("--device", default="cuda", help="training device") parser.add_argument("-b", "--batch-size", default=4, type=int) parser.add_argument("--epochs", default=30, type=int, metavar="N", help="number of total epochs to train") parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('--print-freq', default=10, type=int, help='print frequency') parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='start epoch') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not os.path.exists("./save_weights"): os.mkdir("./save_weights") main(args) ================================================ FILE: pytorch_segmentation/deeplab_v3/train_multi_GPU.py ================================================ import time import os import datetime import torch from src import deeplabv3_resnet50 from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(2.0 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train): base_size = 520 crop_size = 480 return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size) def create_model(aux, num_classes): model = deeplabv3_resnet50(aux=aux, num_classes=num_classes) weights_dict = torch.load("./deeplabv3_resnet50_coco.pth", map_location='cpu') if num_classes != 21: # 官方提供的预训练权重是21类(包括背景) # 如果训练自己的数据集,将和类别相关的权重删除,防止权重shape不一致报错 for k in list(weights_dict.keys()): if "classifier.4" in k: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # segmentation nun_classes + background num_classes = args.num_classes + 1 # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt train_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=True), txt_name="train.txt") # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=False), txt_name="val.txt") print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn, drop_last=True) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + 20 classes model = create_model(aux=args.aux, num_classes=num_classes) model.to(device) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params_to_optimize = [ {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]}, ] if args.aux: params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr * 10}) optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" f.write(train_info + val_info + "\n\n") if args.output_dir: # 只在主节点上执行保存权重操作 save_file = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_file["scaler"] = scaler.state_dict() save_on_master(save_file, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data/', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') parser.add_argument("--aux", default=True, type=bool, help="auxilier loss") # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=20, type=int, metavar='N', help='number of total epochs to run') # 是否使用同步BN(在多个GPU之间同步),默认不开启,开启后训练速度会变慢 parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 训练学习率,这里默认设置成0.0001,如果效果不好可以尝试加大学习率 parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 分布式进程数 parser.add_argument('--world-size', default=1, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_segmentation/deeplab_v3/train_utils/__init__.py ================================================ from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler from .distributed_utils import init_distributed_mode, save_on_master, mkdir ================================================ FILE: pytorch_segmentation/deeplab_v3/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import time import torch import torch.distributed as dist import errno import os class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) class ConfusionMatrix(object): def __init__(self, num_classes): self.num_classes = num_classes self.mat = None def update(self, a, b): n = self.num_classes if self.mat is None: # 创建混淆矩阵 self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) with torch.no_grad(): # 寻找GT中为目标的像素索引 k = (a >= 0) & (a < n) # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙) inds = n * a[k].to(torch.int64) + b[k] self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) def reset(self): if self.mat is not None: self.mat.zero_() def compute(self): h = self.mat.float() # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数) acc_global = torch.diag(h).sum() / h.sum() # 计算每个类别的准确率 acc = torch.diag(h) / h.sum(1) # 计算每个类别预测与真实目标的iou iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) return acc_global, acc, iu def reduce_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() torch.distributed.all_reduce(self.mat) def __str__(self): acc_global, acc, iu = self.compute() return ( 'global correct: {:.1f}\n' 'average row correct: {}\n' 'IoU: {}\n' 'mean IoU: {:.1f}').format( acc_global.item() * 100, ['{:.1f}'.format(i) for i in (acc * 100).tolist()], ['{:.1f}'.format(i) for i in (iu * 100).tolist()], iu.mean().item() * 100) class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0: eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {}'.format(header, total_time_str)) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables printing when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_segmentation/deeplab_v3/train_utils/train_and_eval.py ================================================ import torch from torch import nn import train_utils.distributed_utils as utils def criterion(inputs, target): losses = {} for name, x in inputs.items(): # 忽略target中值为255的像素,255的像素是目标边缘或者padding填充 losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255) if len(losses) == 1: return losses['out'] return losses['out'] + 0.5 * losses['aux'] def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output['out'] confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.reduce_from_all_processes() return confmat def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr def create_lr_scheduler(optimizer, num_step: int, epochs: int, warmup=True, warmup_epochs=1, warmup_factor=1e-3): assert num_step > 0 and epochs > 0 if warmup is False: warmup_epochs = 0 def f(x): """ 根据step数返回一个学习率倍率因子, 注意在训练开始之前,pytorch会提前调用一次lr_scheduler.step()方法 """ if warmup is True and x <= (warmup_epochs * num_step): alpha = float(x) / (warmup_epochs * num_step) # warmup过程中lr倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha else: # warmup后lr倍率因子从1 -> 0 # 参考deeplab_v2: Learning rate policy return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9 return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) ================================================ FILE: pytorch_segmentation/deeplab_v3/transforms.py ================================================ import numpy as np import random import torch from torchvision import transforms as T from torchvision.transforms import functional as F def pad_if_smaller(img, size, fill=0): # 如果图像最小边长小于给定size,则用数值fill进行padding min_size = min(img.size) if min_size < size: ow, oh = img.size padh = size - oh if oh < size else 0 padw = size - ow if ow < size else 0 img = F.pad(img, (0, 0, padw, padh), fill=fill) return img class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class RandomResize(object): def __init__(self, min_size, max_size=None): self.min_size = min_size if max_size is None: max_size = min_size self.max_size = max_size def __call__(self, image, target): size = random.randint(self.min_size, self.max_size) # 这里size传入的是int类型,所以是将图像的最小边长缩放到size大小 image = F.resize(image, size) # 这里的interpolation注意下,在torchvision(0.9.0)以后才有InterpolationMode.NEAREST # 如果是之前的版本需要使用PIL.Image.NEAREST target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST) return image, target class RandomHorizontalFlip(object): def __init__(self, flip_prob): self.flip_prob = flip_prob def __call__(self, image, target): if random.random() < self.flip_prob: image = F.hflip(image) target = F.hflip(target) return image, target class RandomCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = pad_if_smaller(image, self.size) target = pad_if_smaller(target, self.size, fill=255) crop_params = T.RandomCrop.get_params(image, (self.size, self.size)) image = F.crop(image, *crop_params) target = F.crop(target, *crop_params) return image, target class CenterCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = F.center_crop(image, self.size) target = F.center_crop(target, self.size) return image, target class ToTensor(object): def __call__(self, image, target): image = F.to_tensor(image) target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target ================================================ FILE: pytorch_segmentation/deeplab_v3/validation.py ================================================ import os import torch from src import deeplabv3_resnet50 from train_utils import evaluate from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") assert os.path.exists(args.weights), f"weights {args.weights} not found." # segmentation nun_classes + background num_classes = args.num_classes + 1 # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=SegmentationPresetEval(520), txt_name="val.txt") num_workers = 8 val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = deeplabv3_resnet50(aux=args.aux, num_classes=num_classes) model.load_state_dict(torch.load(args.weights, map_location=device)['model']) model.to(device) confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) print(confmat) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch deeplabv3 validation") parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") parser.add_argument("--weights", default="./save_weights/model_29.pth") parser.add_argument("--num-classes", default=20, type=int) parser.add_argument("--aux", default=True, type=bool, help="auxilier loss") parser.add_argument("--device", default="cuda", help="training device") parser.add_argument('--print-freq', default=10, type=int, help='print frequency') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() main(args) ================================================ FILE: pytorch_segmentation/fcn/README.md ================================================ # FCN(Fully Convolutional Networks for Semantic Segmentation) ## 该项目主要是来自pytorch官方torchvision模块中的源码 * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10 * Ubuntu或Centos(Windows暂不支持多GPU训练) * 最好使用GPU训练 * 详细环境配置见```requirements.txt``` ## 文件结构: ``` ├── src: 模型的backbone以及FCN的搭建 ├── train_utils: 训练、验证以及多GPU训练相关模块 ├── my_dataset.py: 自定义dataset用于读取VOC数据集 ├── train.py: 以fcn_resnet50(这里使用了Dilated/Atrous Convolution)进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标,并生成record_mAP.txt文件 └── pascal_voc_classes.json: pascal_voc标签文件 ``` ## 预训练权重下载地址: * 注意:官方提供的预训练权重是在COCO上预训练得到的,训练时只针对和PASCAL VOC相同的类别进行了训练,所以类别数是21(包括背景) * fcn_resnet50: https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth * fcn_resnet101: https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是```fcn_resnet50_coco.pth```文件, 不是```fcn_resnet50_coco-1167a1af.pth``` ## 数据集,本例程使用的是PASCAL VOC2012数据集 * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033 ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备) * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py``` ## 注意事项 * 在使用训练脚本时,注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录** * 在使用预测脚本时,要将'weights_path'设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可,其他代码尽量不要改动 ## 如果对FCN原理不是很理解可参考我的bilibili * https://www.bilibili.com/video/BV1J3411C7zd * https://www.bilibili.com/video/BV1ev411u7TX ## 进一步了解该项目,以及对FCN代码的分析可参考我的bilibili * https://www.bilibili.com/video/BV19q4y1971Q ## Pytorch官方实现的FCN网络框架图 ![torch_fcn](torch_fcn.png) ================================================ FILE: pytorch_segmentation/fcn/get_palette.py ================================================ import json import numpy as np from PIL import Image # 读取mask标签 target = Image.open("./2007_001288.png") # 获取调色板 palette = target.getpalette() palette = np.reshape(palette, (-1, 3)).tolist() # 转换成字典子形式 pd = dict((i, color) for i, color in enumerate(palette)) json_str = json.dumps(pd) with open("palette.json", "w") as f: f.write(json_str) # target = np.array(target) # print(target) ================================================ FILE: pytorch_segmentation/fcn/my_dataset.py ================================================ import os import torch.utils.data as data from PIL import Image class VOCSegmentation(data.Dataset): def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): super(VOCSegmentation, self).__init__() assert year in ["2007", "2012"], "year must be in ['2007', '2012']" root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") assert os.path.exists(root), "path '{}' does not exist.".format(root) image_dir = os.path.join(root, 'JPEGImages') mask_dir = os.path.join(root, 'SegmentationClass') txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) with open(os.path.join(txt_path), "r") as f: file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] assert (len(self.images) == len(self.masks)) self.transforms = transforms def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is the image segmentation. """ img = Image.open(self.images[index]).convert('RGB') target = Image.open(self.masks[index]) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.images) @staticmethod def collate_fn(batch): images, targets = list(zip(*batch)) batched_imgs = cat_list(images, fill_value=0) batched_targets = cat_list(targets, fill_value=255) return batched_imgs, batched_targets def cat_list(images, fill_value=0): # 计算该batch数据中,channel, h, w的最大值 max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) batch_shape = (len(images),) + max_size batched_imgs = images[0].new(*batch_shape).fill_(fill_value) for img, pad_img in zip(images, batched_imgs): pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) return batched_imgs # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True)) # d1 = dataset[0] # print(d1) ================================================ FILE: pytorch_segmentation/fcn/palette.json ================================================ {"0": [0, 0, 0], "1": [128, 0, 0], "2": [0, 128, 0], "3": [128, 128, 0], "4": [0, 0, 128], "5": [128, 0, 128], "6": [0, 128, 128], "7": [128, 128, 128], "8": [64, 0, 0], "9": [192, 0, 0], "10": [64, 128, 0], "11": [192, 128, 0], "12": [64, 0, 128], "13": [192, 0, 128], "14": [64, 128, 128], "15": [192, 128, 128], "16": [0, 64, 0], "17": [128, 64, 0], "18": [0, 192, 0], "19": [128, 192, 0], "20": [0, 64, 128], "21": [128, 64, 128], "22": [0, 192, 128], "23": [128, 192, 128], "24": [64, 64, 0], "25": [192, 64, 0], "26": [64, 192, 0], "27": [192, 192, 0], "28": [64, 64, 128], "29": [192, 64, 128], "30": [64, 192, 128], "31": [192, 192, 128], "32": [0, 0, 64], "33": [128, 0, 64], "34": [0, 128, 64], "35": [128, 128, 64], "36": [0, 0, 192], "37": [128, 0, 192], "38": [0, 128, 192], "39": [128, 128, 192], "40": [64, 0, 64], "41": [192, 0, 64], "42": [64, 128, 64], "43": [192, 128, 64], "44": [64, 0, 192], "45": [192, 0, 192], "46": [64, 128, 192], "47": [192, 128, 192], "48": [0, 64, 64], "49": [128, 64, 64], "50": [0, 192, 64], "51": [128, 192, 64], "52": [0, 64, 192], "53": [128, 64, 192], "54": [0, 192, 192], "55": [128, 192, 192], "56": [64, 64, 64], "57": [192, 64, 64], "58": [64, 192, 64], "59": [192, 192, 64], "60": [64, 64, 192], "61": [192, 64, 192], "62": [64, 192, 192], "63": [192, 192, 192], "64": [32, 0, 0], "65": [160, 0, 0], "66": [32, 128, 0], "67": [160, 128, 0], "68": [32, 0, 128], "69": [160, 0, 128], "70": [32, 128, 128], "71": [160, 128, 128], "72": [96, 0, 0], "73": [224, 0, 0], "74": [96, 128, 0], "75": [224, 128, 0], "76": [96, 0, 128], "77": [224, 0, 128], "78": [96, 128, 128], "79": [224, 128, 128], "80": [32, 64, 0], "81": [160, 64, 0], "82": [32, 192, 0], "83": [160, 192, 0], "84": [32, 64, 128], "85": [160, 64, 128], "86": [32, 192, 128], "87": [160, 192, 128], "88": [96, 64, 0], "89": [224, 64, 0], "90": [96, 192, 0], "91": [224, 192, 0], "92": [96, 64, 128], "93": [224, 64, 128], "94": [96, 192, 128], "95": [224, 192, 128], "96": [32, 0, 64], "97": [160, 0, 64], "98": [32, 128, 64], "99": [160, 128, 64], "100": [32, 0, 192], "101": [160, 0, 192], "102": [32, 128, 192], "103": [160, 128, 192], "104": [96, 0, 64], "105": [224, 0, 64], "106": [96, 128, 64], "107": [224, 128, 64], "108": [96, 0, 192], "109": [224, 0, 192], "110": [96, 128, 192], "111": [224, 128, 192], "112": [32, 64, 64], "113": [160, 64, 64], "114": [32, 192, 64], "115": [160, 192, 64], "116": [32, 64, 192], "117": [160, 64, 192], "118": [32, 192, 192], "119": [160, 192, 192], "120": [96, 64, 64], "121": [224, 64, 64], "122": [96, 192, 64], "123": [224, 192, 64], "124": [96, 64, 192], "125": [224, 64, 192], "126": [96, 192, 192], "127": [224, 192, 192], "128": [0, 32, 0], "129": [128, 32, 0], "130": [0, 160, 0], "131": [128, 160, 0], "132": [0, 32, 128], "133": [128, 32, 128], "134": [0, 160, 128], "135": [128, 160, 128], "136": [64, 32, 0], "137": [192, 32, 0], "138": [64, 160, 0], "139": [192, 160, 0], "140": [64, 32, 128], "141": [192, 32, 128], "142": [64, 160, 128], "143": [192, 160, 128], "144": [0, 96, 0], "145": [128, 96, 0], "146": [0, 224, 0], "147": [128, 224, 0], "148": [0, 96, 128], "149": [128, 96, 128], "150": [0, 224, 128], "151": [128, 224, 128], "152": [64, 96, 0], "153": [192, 96, 0], "154": [64, 224, 0], "155": [192, 224, 0], "156": [64, 96, 128], "157": [192, 96, 128], "158": [64, 224, 128], "159": [192, 224, 128], "160": [0, 32, 64], "161": [128, 32, 64], "162": [0, 160, 64], "163": [128, 160, 64], "164": [0, 32, 192], "165": [128, 32, 192], "166": [0, 160, 192], "167": [128, 160, 192], "168": [64, 32, 64], "169": [192, 32, 64], "170": [64, 160, 64], "171": [192, 160, 64], "172": [64, 32, 192], "173": [192, 32, 192], "174": [64, 160, 192], "175": [192, 160, 192], "176": [0, 96, 64], "177": [128, 96, 64], "178": [0, 224, 64], "179": [128, 224, 64], "180": [0, 96, 192], "181": [128, 96, 192], "182": [0, 224, 192], "183": [128, 224, 192], "184": [64, 96, 64], "185": [192, 96, 64], "186": [64, 224, 64], "187": [192, 224, 64], "188": [64, 96, 192], "189": [192, 96, 192], "190": [64, 224, 192], "191": [192, 224, 192], "192": [32, 32, 0], "193": [160, 32, 0], "194": [32, 160, 0], "195": [160, 160, 0], "196": [32, 32, 128], "197": [160, 32, 128], "198": [32, 160, 128], "199": [160, 160, 128], "200": [96, 32, 0], "201": [224, 32, 0], "202": [96, 160, 0], "203": [224, 160, 0], "204": [96, 32, 128], "205": [224, 32, 128], "206": [96, 160, 128], "207": [224, 160, 128], "208": [32, 96, 0], "209": [160, 96, 0], "210": [32, 224, 0], "211": [160, 224, 0], "212": [32, 96, 128], "213": [160, 96, 128], "214": [32, 224, 128], "215": [160, 224, 128], "216": [96, 96, 0], "217": [224, 96, 0], "218": [96, 224, 0], "219": [224, 224, 0], "220": [96, 96, 128], "221": [224, 96, 128], "222": [96, 224, 128], "223": [224, 224, 128], "224": [32, 32, 64], "225": [160, 32, 64], "226": [32, 160, 64], "227": [160, 160, 64], "228": [32, 32, 192], "229": [160, 32, 192], "230": [32, 160, 192], "231": [160, 160, 192], "232": [96, 32, 64], "233": [224, 32, 64], "234": [96, 160, 64], "235": [224, 160, 64], "236": [96, 32, 192], "237": [224, 32, 192], "238": [96, 160, 192], "239": [224, 160, 192], "240": [32, 96, 64], "241": [160, 96, 64], "242": [32, 224, 64], "243": [160, 224, 64], "244": [32, 96, 192], "245": [160, 96, 192], "246": [32, 224, 192], "247": [160, 224, 192], "248": [96, 96, 64], "249": [224, 96, 64], "250": [96, 224, 64], "251": [224, 224, 64], "252": [96, 96, 192], "253": [224, 96, 192], "254": [96, 224, 192], "255": [224, 224, 192]} ================================================ FILE: pytorch_segmentation/fcn/pascal_voc_classes.json ================================================ { "aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, "bottle": 5, "bus": 6, "car": 7, "cat": 8, "chair": 9, "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, "motorbike": 14, "person": 15, "pottedplant": 16, "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20 } ================================================ FILE: pytorch_segmentation/fcn/predict.py ================================================ import os import time import json import torch from torchvision import transforms import numpy as np from PIL import Image from src import fcn_resnet50 def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): aux = False # inference time not need aux_classifier classes = 20 weights_path = "./save_weights/model_29.pth" img_path = "./test.jpg" palette_path = "./palette.json" assert os.path.exists(weights_path), f"weights {weights_path} not found." assert os.path.exists(img_path), f"image {img_path} not found." assert os.path.exists(palette_path), f"palette {palette_path} not found." with open(palette_path, "rb") as f: pallette_dict = json.load(f) pallette = [] for v in pallette_dict.values(): pallette += v # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = fcn_resnet50(aux=aux, num_classes=classes+1) # delete weights about aux_classifier weights_dict = torch.load(weights_path, map_location='cpu')['model'] for k in list(weights_dict.keys()): if "aux" in k: del weights_dict[k] # load weights model.load_state_dict(weights_dict) model.to(device) # load image original_img = Image.open(img_path) # from pil image to tensor and normalize data_transform = transforms.Compose([transforms.Resize(520), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init model img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() output = model(img.to(device)) t_end = time_synchronized() print("inference time: {}".format(t_end - t_start)) prediction = output['out'].argmax(1).squeeze(0) prediction = prediction.to("cpu").numpy().astype(np.uint8) mask = Image.fromarray(prediction) mask.putpalette(pallette) mask.save("test_result.png") if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/fcn/requirements.txt ================================================ numpy==1.22.0 torch==1.13.1 torchvision==0.11.1 Pillow ================================================ FILE: pytorch_segmentation/fcn/results20210918-122740.txt ================================================ [epoch: 0] train_loss: 0.7720 lr: 0.000100 global correct: 93.4 average row correct: ['96.8', '90.7', '79.2', '82.7', '78.3', '59.4', '91.2', '82.9', '92.9', '57.8', '87.1', '66.5', '82.8', '78.7', '88.9', '95.1', '62.9', '86.6', '67.1', '89.4', '81.4'] IoU: ['93.2', '85.9', '39.2', '79.5', '68.9', '55.6', '88.1', '72.8', '81.7', '41.1', '74.9', '54.0', '72.9', '74.6', '77.7', '87.1', '54.4', '75.1', '50.7', '82.9', '72.6'] mean IoU: 70.6 [epoch: 1] train_loss: 0.6589 lr: 0.000090 global correct: 93.4 average row correct: ['96.5', '89.4', '74.6', '84.5', '82.9', '68.8', '93.8', '84.7', '93.5', '56.9', '87.5', '68.0', '81.8', '78.2', '90.7', '94.8', '65.6', '87.1', '70.9', '89.1', '85.1'] IoU: ['93.3', '85.5', '38.2', '79.9', '69.8', '62.7', '87.5', '75.7', '80.3', '40.7', '74.8', '54.6', '72.2', '74.1', '76.6', '87.6', '54.7', '72.9', '51.0', '82.6', '70.9'] mean IoU: 70.7 [epoch: 2] train_loss: 0.6238 lr: 0.000080 global correct: 93.5 average row correct: ['96.5', '93.2', '75.8', '85.3', '84.2', '70.6', '91.7', '85.7', '93.2', '58.8', '76.7', '68.3', '81.4', '83.2', '88.7', '95.1', '69.9', '88.6', '70.5', '91.8', '86.7'] IoU: ['93.5', '86.1', '39.0', '81.2', '69.9', '63.9', '87.7', '76.4', '80.2', '41.5', '71.8', '56.2', '71.3', '74.4', '78.0', '87.3', '57.3', '70.9', '50.3', '82.8', '71.9'] mean IoU: 71.0 [epoch: 3] train_loss: 0.5854 lr: 0.000069 global correct: 93.5 average row correct: ['96.7', '91.5', '77.3', '83.9', '80.8', '74.0', '92.6', '86.7', '94.3', '65.0', '68.7', '67.8', '76.9', '88.2', '85.5', '94.5', '71.8', '87.9', '66.7', '89.4', '86.3'] IoU: ['93.6', '87.0', '39.4', '80.6', '69.4', '66.5', '87.7', '76.9', '78.5', '41.5', '66.3', '55.8', '68.9', '70.8', '78.4', '88.2', '58.7', '71.0', '49.4', '83.3', '74.1'] mean IoU: 70.8 [epoch: 4] train_loss: 0.6140 lr: 0.000059 global correct: 93.6 average row correct: ['96.5', '92.4', '77.4', '85.1', '80.2', '80.6', '94.1', '87.0', '94.8', '62.8', '87.2', '70.0', '78.8', '77.5', '85.8', '94.7', '73.4', '83.9', '68.6', '88.0', '86.6'] IoU: ['93.7', '87.1', '39.4', '80.7', '70.1', '70.2', '87.0', '77.3', '78.9', '41.3', '72.3', '56.7', '69.9', '72.2', '77.9', '87.8', '57.8', '72.6', '50.5', '82.1', '74.3'] mean IoU: 71.4 [epoch: 5] train_loss: 0.5653 lr: 0.000048 global correct: 93.7 average row correct: ['96.6', '87.7', '76.9', '84.3', '79.3', '81.6', '92.6', '88.4', '94.0', '61.9', '76.7', '71.0', '81.5', '88.2', '87.3', '94.5', '73.5', '84.9', '69.1', '91.4', '86.6'] IoU: ['93.7', '85.6', '40.1', '80.7', '70.2', '70.5', '87.6', '77.4', '80.4', '42.0', '72.4', '57.0', '72.2', '73.4', '78.4', '88.1', '58.6', '74.3', '50.4', '82.7', '73.6'] mean IoU: 71.9 [epoch: 6] train_loss: 0.5500 lr: 0.000037 global correct: 93.1 average row correct: ['96.4', '91.6', '74.8', '78.4', '83.2', '81.9', '89.4', '88.7', '95.8', '59.4', '57.2', '70.0', '77.4', '75.2', '87.6', '95.2', '74.1', '82.4', '72.4', '91.9', '87.2'] IoU: ['93.6', '87.5', '39.5', '76.1', '68.5', '71.0', '86.1', '78.6', '74.6', '41.4', '54.6', '57.2', '61.0', '67.0', '78.2', '87.9', '58.1', '67.5', '50.4', '82.0', '74.2'] mean IoU: 69.3 [epoch: 7] train_loss: 0.5553 lr: 0.000026 global correct: 93.3 average row correct: ['96.7', '88.4', '72.3', '82.2', '80.7', '81.6', '82.5', '89.7', '93.4', '59.0', '69.5', '70.1', '78.8', '86.4', '87.3', '94.9', '70.8', '89.6', '72.2', '85.4', '86.3'] IoU: ['93.6', '85.5', '39.0', '79.2', '69.8', '70.8', '79.7', '76.8', '79.0', '41.8', '65.9', '57.1', '68.9', '71.0', '78.0', '87.9', '58.6', '66.9', '50.7', '78.2', '74.4'] mean IoU: 70.1 [epoch: 8] train_loss: 0.5601 lr: 0.000014 global correct: 93.4 average row correct: ['96.5', '91.0', '73.8', '81.4', '83.7', '83.4', '89.9', '88.8', '95.4', '61.3', '80.6', '70.0', '75.4', '84.3', '88.2', '94.9', '72.0', '83.7', '69.7', '83.3', '88.5'] IoU: ['93.6', '87.2', '40.1', '78.6', '69.8', '71.3', '84.5', '77.6', '76.3', '41.0', '72.4', '56.8', '66.9', '73.2', '77.6', '87.8', '59.2', '72.5', '50.2', '78.7', '69.9'] mean IoU: 70.7 [epoch: 9] train_loss: 0.5550 lr: 0.000000 global correct: 93.1 average row correct: ['96.7', '93.8', '72.7', '73.0', '82.1', '80.4', '95.6', '86.7', '95.6', '61.8', '63.6', '69.0', '73.2', '65.1', '87.9', '94.5', '73.7', '86.5', '69.0', '88.4', '87.9'] IoU: ['93.7', '87.2', '39.4', '71.7', '70.4', '70.9', '86.9', '78.5', '73.1', '41.8', '58.4', '56.3', '59.4', '61.9', '78.2', '88.4', '59.3', '63.6', '50.4', '82.6', '73.7'] mean IoU: 68.8 ================================================ FILE: pytorch_segmentation/fcn/src/__init__.py ================================================ from .fcn_model import fcn_resnet50, fcn_resnet101 ================================================ FILE: pytorch_segmentation/fcn/src/backbone.py ================================================ import torch import torch.nn as nn def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation) def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) class Bottleneck(nn.Module): # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) # while original implementation places the stride at the first 1x1 convolution(self.conv1) # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. # This variant is also known as ResNet V1.5 and improves accuracy according to # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): super(Bottleneck, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width / 64.)) * groups # Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width) self.bn1 = norm_layer(width) self.conv2 = conv3x3(width, width, stride, groups, dilation) self.bn2 = norm_layer(width) self.conv3 = conv1x1(width, planes * self.expansion) self.bn3 = norm_layer(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer = self._norm_layer downsample = None previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer)) return nn.Sequential(*layers) def _forward_impl(self, x): # See note [TorchScript super()] x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def forward(self, x): return self._forward_impl(x) def _resnet(block, layers, **kwargs): model = ResNet(block, layers, **kwargs) return model def resnet50(**kwargs): r"""ResNet-50 model from `"Deep Residual Learning for Image Recognition" `_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs) def resnet101(**kwargs): r"""ResNet-101 model from `"Deep Residual Learning for Image Recognition" `_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs) ================================================ FILE: pytorch_segmentation/fcn/src/fcn_model.py ================================================ from collections import OrderedDict from typing import Dict import torch from torch import nn, Tensor from torch.nn import functional as F from .backbone import resnet50, resnet101 class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Args: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ _version = 2 __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None: if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} # 重新构建backbone,将没有使用到的模块全部删掉 layers = OrderedDict() for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super(IntermediateLayerGetter, self).__init__(layers) self.return_layers = orig_return_layers def forward(self, x: Tensor) -> Dict[str, Tensor]: out = OrderedDict() for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class FCN(nn.Module): """ Implements a Fully-Convolutional Network for semantic segmentation. Args: backbone (nn.Module): the network used to compute the features for the model. The backbone should return an OrderedDict[Tensor], with the key being "out" for the last feature map used, and "aux" if an auxiliary classifier is used. classifier (nn.Module): module that takes the "out" element returned from the backbone and returns a dense prediction. aux_classifier (nn.Module, optional): auxiliary classifier used during training """ __constants__ = ['aux_classifier'] def __init__(self, backbone, classifier, aux_classifier=None): super(FCN, self).__init__() self.backbone = backbone self.classifier = classifier self.aux_classifier = aux_classifier def forward(self, x: Tensor) -> Dict[str, Tensor]: input_shape = x.shape[-2:] # contract: features is a dict of tensors features = self.backbone(x) result = OrderedDict() x = features["out"] x = self.classifier(x) # 原论文中虽然使用的是ConvTranspose2d,但权重是冻结的,所以就是一个bilinear插值 x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) result["out"] = x if self.aux_classifier is not None: x = features["aux"] x = self.aux_classifier(x) # 原论文中虽然使用的是ConvTranspose2d,但权重是冻结的,所以就是一个bilinear插值 x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) result["aux"] = x return result class FCNHead(nn.Sequential): def __init__(self, in_channels, channels): inter_channels = in_channels // 4 layers = [ nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), nn.BatchNorm2d(inter_channels), nn.ReLU(), nn.Dropout(0.1), nn.Conv2d(inter_channels, channels, 1) ] super(FCNHead, self).__init__(*layers) def fcn_resnet50(aux, num_classes=21, pretrain_backbone=False): # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth' # 'fcn_resnet50_coco': 'https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth' backbone = resnet50(replace_stride_with_dilation=[False, True, True]) if pretrain_backbone: # 载入resnet50 backbone预训练权重 backbone.load_state_dict(torch.load("resnet50.pth", map_location='cpu')) out_inplanes = 2048 aux_inplanes = 1024 return_layers = {'layer4': 'out'} if aux: return_layers['layer3'] = 'aux' backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) aux_classifier = None # why using aux: https://github.com/pytorch/vision/issues/4292 if aux: aux_classifier = FCNHead(aux_inplanes, num_classes) classifier = FCNHead(out_inplanes, num_classes) model = FCN(backbone, classifier, aux_classifier) return model def fcn_resnet101(aux, num_classes=21, pretrain_backbone=False): # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth' # 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth' backbone = resnet101(replace_stride_with_dilation=[False, True, True]) if pretrain_backbone: # 载入resnet101 backbone预训练权重 backbone.load_state_dict(torch.load("resnet101.pth", map_location='cpu')) out_inplanes = 2048 aux_inplanes = 1024 return_layers = {'layer4': 'out'} if aux: return_layers['layer3'] = 'aux' backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) aux_classifier = None # why using aux: https://github.com/pytorch/vision/issues/4292 if aux: aux_classifier = FCNHead(aux_inplanes, num_classes) classifier = FCNHead(out_inplanes, num_classes) model = FCN(backbone, classifier, aux_classifier) return model ================================================ FILE: pytorch_segmentation/fcn/train.py ================================================ import os import time import datetime import torch from src import fcn_resnet50 from train_utils import train_one_epoch, evaluate, create_lr_scheduler from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(2.0 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train): base_size = 520 crop_size = 480 return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size) def create_model(aux, num_classes, pretrain=True): model = fcn_resnet50(aux=aux, num_classes=num_classes) if pretrain: weights_dict = torch.load("./fcn_resnet50_coco.pth", map_location='cpu') if num_classes != 21: # 官方提供的预训练权重是21类(包括背景) # 如果训练自己的数据集,将和类别相关的权重删除,防止权重shape不一致报错 for k in list(weights_dict.keys()): if "classifier.4" in k: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") batch_size = args.batch_size # segmentation nun_classes + background num_classes = args.num_classes + 1 # 用来保存训练以及验证过程中信息 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt train_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=True), txt_name="train.txt") # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=False), txt_name="val.txt") num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = create_model(aux=args.aux, num_classes=num_classes) model.to(device) params_to_optimize = [ {"params": [p for p in model.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model.classifier.parameters() if p.requires_grad]} ] if args.aux: params = [p for p in model.aux_classifier.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr * 10}) optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) start_time = time.time() for epoch in range(args.start_epoch, args.epochs): mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" f.write(train_info + val_info + "\n\n") save_file = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args} if args.amp: save_file["scaler"] = scaler.state_dict() torch.save(save_file, "save_weights/model_{}.pth".format(epoch)) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("training time {}".format(total_time_str)) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch fcn training") parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") parser.add_argument("--num-classes", default=20, type=int) parser.add_argument("--aux", default=True, type=bool, help="auxilier loss") parser.add_argument("--device", default="cuda", help="training device") parser.add_argument("-b", "--batch-size", default=4, type=int) parser.add_argument("--epochs", default=30, type=int, metavar="N", help="number of total epochs to train") parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('--print-freq', default=10, type=int, help='print frequency') parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='start epoch') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not os.path.exists("./save_weights"): os.mkdir("./save_weights") main(args) ================================================ FILE: pytorch_segmentation/fcn/train_multi_GPU.py ================================================ import time import os import datetime import torch from src import fcn_resnet50 from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(2.0 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train): base_size = 520 crop_size = 480 return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size) def create_model(aux, num_classes): model = fcn_resnet50(aux=aux, num_classes=num_classes) weights_dict = torch.load("./fcn_resnet50_coco.pth", map_location='cpu') if num_classes != 21: # 官方提供的预训练权重是21类(包括背景) # 如果训练自己的数据集,将和类别相关的权重删除,防止权重shape不一致报错 for k in list(weights_dict.keys()): if "classifier.4" in k: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # segmentation nun_classes + background num_classes = args.num_classes + 1 # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt train_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=True), txt_name="train.txt") # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=False), txt_name="val.txt") print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn, drop_last=True) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + 20 classes model = create_model(aux=args.aux, num_classes=num_classes) model.to(device) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params_to_optimize = [ {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]}, ] if args.aux: params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr * 10}) optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" f.write(train_info + val_info + "\n\n") if args.output_dir: # 只在主节点上执行保存权重操作 save_file = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_file["scaler"] = scaler.state_dict() save_on_master(save_file, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data/', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') parser.add_argument("--aux", default=True, type=bool, help="auxilier loss") # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=20, type=int, metavar='N', help='number of total epochs to run') # 是否使用同步BN(在多个GPU之间同步),默认不开启,开启后训练速度会变慢 parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 训练学习率,这里默认设置成0.0001,如果效果不好可以尝试加大学习率 parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 分布式进程数 parser.add_argument('--world-size', default=1, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_segmentation/fcn/train_utils/__init__.py ================================================ from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler from .distributed_utils import init_distributed_mode, save_on_master, mkdir ================================================ FILE: pytorch_segmentation/fcn/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import time import torch import torch.distributed as dist import errno import os class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) class ConfusionMatrix(object): def __init__(self, num_classes): self.num_classes = num_classes self.mat = None def update(self, a, b): n = self.num_classes if self.mat is None: # 创建混淆矩阵 self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) with torch.no_grad(): # 寻找GT中为目标的像素索引 k = (a >= 0) & (a < n) # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙) inds = n * a[k].to(torch.int64) + b[k] self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) def reset(self): if self.mat is not None: self.mat.zero_() def compute(self): h = self.mat.float() # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数) acc_global = torch.diag(h).sum() / h.sum() # 计算每个类别的准确率 acc = torch.diag(h) / h.sum(1) # 计算每个类别预测与真实目标的iou iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) return acc_global, acc, iu def reduce_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() torch.distributed.all_reduce(self.mat) def __str__(self): acc_global, acc, iu = self.compute() return ( 'global correct: {:.1f}\n' 'average row correct: {}\n' 'IoU: {}\n' 'mean IoU: {:.1f}').format( acc_global.item() * 100, ['{:.1f}'.format(i) for i in (acc * 100).tolist()], ['{:.1f}'.format(i) for i in (iu * 100).tolist()], iu.mean().item() * 100) class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0: eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {}'.format(header, total_time_str)) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables printing when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_segmentation/fcn/train_utils/train_and_eval.py ================================================ import torch from torch import nn import train_utils.distributed_utils as utils def criterion(inputs, target): losses = {} for name, x in inputs.items(): # 忽略target中值为255的像素,255的像素是目标边缘或者padding填充 losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255) if len(losses) == 1: return losses['out'] return losses['out'] + 0.5 * losses['aux'] def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output['out'] confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.reduce_from_all_processes() return confmat def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr def create_lr_scheduler(optimizer, num_step: int, epochs: int, warmup=True, warmup_epochs=1, warmup_factor=1e-3): assert num_step > 0 and epochs > 0 if warmup is False: warmup_epochs = 0 def f(x): """ 根据step数返回一个学习率倍率因子, 注意在训练开始之前,pytorch会提前调用一次lr_scheduler.step()方法 """ if warmup is True and x <= (warmup_epochs * num_step): alpha = float(x) / (warmup_epochs * num_step) # warmup过程中lr倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha else: # warmup后lr倍率因子从1 -> 0 # 参考deeplab_v2: Learning rate policy return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9 return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) ================================================ FILE: pytorch_segmentation/fcn/transforms.py ================================================ import numpy as np import random import torch from torchvision import transforms as T from torchvision.transforms import functional as F def pad_if_smaller(img, size, fill=0): # 如果图像最小边长小于给定size,则用数值fill进行padding min_size = min(img.size) if min_size < size: ow, oh = img.size padh = size - oh if oh < size else 0 padw = size - ow if ow < size else 0 img = F.pad(img, (0, 0, padw, padh), fill=fill) return img class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class RandomResize(object): def __init__(self, min_size, max_size=None): self.min_size = min_size if max_size is None: max_size = min_size self.max_size = max_size def __call__(self, image, target): size = random.randint(self.min_size, self.max_size) # 这里size传入的是int类型,所以是将图像的最小边长缩放到size大小 image = F.resize(image, size) # 这里的interpolation注意下,在torchvision(0.9.0)以后才有InterpolationMode.NEAREST # 如果是之前的版本需要使用PIL.Image.NEAREST target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST) return image, target class RandomHorizontalFlip(object): def __init__(self, flip_prob): self.flip_prob = flip_prob def __call__(self, image, target): if random.random() < self.flip_prob: image = F.hflip(image) target = F.hflip(target) return image, target class RandomCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = pad_if_smaller(image, self.size) target = pad_if_smaller(target, self.size, fill=255) crop_params = T.RandomCrop.get_params(image, (self.size, self.size)) image = F.crop(image, *crop_params) target = F.crop(target, *crop_params) return image, target class CenterCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = F.center_crop(image, self.size) target = F.center_crop(target, self.size) return image, target class ToTensor(object): def __call__(self, image, target): image = F.to_tensor(image) target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target ================================================ FILE: pytorch_segmentation/fcn/validation.py ================================================ import os import torch from src import fcn_resnet50 from train_utils import evaluate from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") assert os.path.exists(args.weights), f"weights {args.weights} not found." # segmentation nun_classes + background num_classes = args.num_classes + 1 # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=SegmentationPresetEval(520), txt_name="val.txt") num_workers = 8 val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = fcn_resnet50(aux=args.aux, num_classes=num_classes) model.load_state_dict(torch.load(args.weights, map_location=device)['model']) model.to(device) confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) print(confmat) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch fcn training") parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") parser.add_argument("--weights", default="./save_weights/model_29.pth") parser.add_argument("--num-classes", default=20, type=int) parser.add_argument("--aux", default=True, type=bool, help="auxilier loss") parser.add_argument("--device", default="cuda", help="training device") parser.add_argument('--print-freq', default=10, type=int, help='print frequency') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not os.path.exists("./save_weights"): os.mkdir("./save_weights") main(args) ================================================ FILE: pytorch_segmentation/lraspp/README.md ================================================ # LRASPP(Searching for MobileNetV3) ## 该项目主要是来自pytorch官方torchvision模块中的源码 * https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10 * Ubuntu或Centos(Windows暂不支持多GPU训练) * 最好使用GPU训练 * 详细环境配置见```requirements.txt``` ## 文件结构: ``` ├── src: 模型的backbone以及LRASPP的搭建 ├── train_utils: 训练、验证以及多GPU训练相关模块 ├── my_dataset.py: 自定义dataset用于读取VOC数据集 ├── train.py: 单GPU训练脚本 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标,并生成record_mAP.txt文件 └── pascal_voc_classes.json: pascal_voc标签文件 ``` ## 预训练权重下载地址: * 注意:官方提供的预训练权重是在COCO上预训练得到的,训练时只针对和PASCAL VOC相同的类别进行了训练,所以类别数是21(包括背景) * lraspp_mobilenet_v3_large: https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth * 注意,下载的预训练权重记得要重命名,比如在train.py中读取的是```lraspp_mobilenet_v3_large.pth```文件, 不是```lraspp_mobilenet_v3_large-d234d4ea.pth``` ## 数据集,本例程使用的是PASCAL VOC2012数据集 * Pascal VOC2012 train/val数据集下载地址:http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar * 如果不了解数据集或者想使用自己的数据集进行训练,请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033 ## 训练方法 * 确保提前准备好数据集 * 确保提前下载好对应预训练模型权重 * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备) * ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py``` ## 注意事项 * 在使用训练脚本时,注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录** * 在使用预测脚本时,要将'weights_path'设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改'--num-classes'、'--data-path'和'--weights'即可,其他代码尽量不要改动 ## 如果对LRASPP原理不是很理解可参考我的bilibili LR-ASPP网络讲解: [https://www.bilibili.com/video/BV1LS4y1M76E](https://www.bilibili.com/video/BV1LS4y1M76E) ## 进一步了解该项目,以及对LRASPP代码的分析可参考我的bilibili LR-ASPP源码解析(Pytorch版): [https://www.bilibili.com/video/bv13D4y1F7ML](https://www.bilibili.com/video/bv13D4y1F7ML) ## Pytorch官方实现的LRASPP网络框架图 ![lraspp](lraspp.png) ================================================ FILE: pytorch_segmentation/lraspp/get_palette.py ================================================ import json import numpy as np from PIL import Image # 读取mask标签 target = Image.open("./2007_001288.png") # 获取调色板 palette = target.getpalette() palette = np.reshape(palette, (-1, 3)).tolist() # 转换成字典子形式 pd = dict((i, color) for i, color in enumerate(palette)) json_str = json.dumps(pd) with open("palette.json", "w") as f: f.write(json_str) # target = np.array(target) # print(target) ================================================ FILE: pytorch_segmentation/lraspp/my_dataset.py ================================================ import os import torch.utils.data as data from PIL import Image class VOCSegmentation(data.Dataset): def __init__(self, voc_root, year="2012", transforms=None, txt_name: str = "train.txt"): super(VOCSegmentation, self).__init__() assert year in ["2007", "2012"], "year must be in ['2007', '2012']" root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}") assert os.path.exists(root), "path '{}' does not exist.".format(root) image_dir = os.path.join(root, 'JPEGImages') mask_dir = os.path.join(root, 'SegmentationClass') txt_path = os.path.join(root, "ImageSets", "Segmentation", txt_name) assert os.path.exists(txt_path), "file '{}' does not exist.".format(txt_path) with open(os.path.join(txt_path), "r") as f: file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0] self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] assert (len(self.images) == len(self.masks)) self.transforms = transforms def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is the image segmentation. """ img = Image.open(self.images[index]).convert('RGB') target = Image.open(self.masks[index]) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.images) @staticmethod def collate_fn(batch): images, targets = list(zip(*batch)) batched_imgs = cat_list(images, fill_value=0) batched_targets = cat_list(targets, fill_value=255) return batched_imgs, batched_targets def cat_list(images, fill_value=0): max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) batch_shape = (len(images),) + max_size batched_imgs = images[0].new(*batch_shape).fill_(fill_value) for img, pad_img in zip(images, batched_imgs): pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) return batched_imgs # dataset = VOCSegmentation(voc_root="/data/", transforms=get_transform(train=True)) # d1 = dataset[0] # print(d1) ================================================ FILE: pytorch_segmentation/lraspp/palette.json ================================================ {"0": [0, 0, 0], "1": [128, 0, 0], "2": [0, 128, 0], "3": [128, 128, 0], "4": [0, 0, 128], "5": [128, 0, 128], "6": [0, 128, 128], "7": [128, 128, 128], "8": [64, 0, 0], "9": [192, 0, 0], "10": [64, 128, 0], "11": [192, 128, 0], "12": [64, 0, 128], "13": [192, 0, 128], "14": [64, 128, 128], "15": [192, 128, 128], "16": [0, 64, 0], "17": [128, 64, 0], "18": [0, 192, 0], "19": [128, 192, 0], "20": [0, 64, 128], "21": [128, 64, 128], "22": [0, 192, 128], "23": [128, 192, 128], "24": [64, 64, 0], "25": [192, 64, 0], "26": [64, 192, 0], "27": [192, 192, 0], "28": [64, 64, 128], "29": [192, 64, 128], "30": [64, 192, 128], "31": [192, 192, 128], "32": [0, 0, 64], "33": [128, 0, 64], "34": [0, 128, 64], "35": [128, 128, 64], "36": [0, 0, 192], "37": [128, 0, 192], "38": [0, 128, 192], "39": [128, 128, 192], "40": [64, 0, 64], "41": [192, 0, 64], "42": [64, 128, 64], "43": [192, 128, 64], "44": [64, 0, 192], "45": [192, 0, 192], "46": [64, 128, 192], "47": [192, 128, 192], "48": [0, 64, 64], "49": [128, 64, 64], "50": [0, 192, 64], "51": [128, 192, 64], "52": [0, 64, 192], "53": [128, 64, 192], "54": [0, 192, 192], "55": [128, 192, 192], "56": [64, 64, 64], "57": [192, 64, 64], "58": [64, 192, 64], "59": [192, 192, 64], "60": [64, 64, 192], "61": [192, 64, 192], "62": [64, 192, 192], "63": [192, 192, 192], "64": [32, 0, 0], "65": [160, 0, 0], "66": [32, 128, 0], "67": [160, 128, 0], "68": [32, 0, 128], "69": [160, 0, 128], "70": [32, 128, 128], "71": [160, 128, 128], "72": [96, 0, 0], "73": [224, 0, 0], "74": [96, 128, 0], "75": [224, 128, 0], "76": [96, 0, 128], "77": [224, 0, 128], "78": [96, 128, 128], "79": [224, 128, 128], "80": [32, 64, 0], "81": [160, 64, 0], "82": [32, 192, 0], "83": [160, 192, 0], "84": [32, 64, 128], "85": [160, 64, 128], "86": [32, 192, 128], "87": [160, 192, 128], "88": [96, 64, 0], "89": [224, 64, 0], "90": [96, 192, 0], "91": [224, 192, 0], "92": [96, 64, 128], "93": [224, 64, 128], "94": [96, 192, 128], "95": [224, 192, 128], "96": [32, 0, 64], "97": [160, 0, 64], "98": [32, 128, 64], "99": [160, 128, 64], "100": [32, 0, 192], "101": [160, 0, 192], "102": [32, 128, 192], "103": [160, 128, 192], "104": [96, 0, 64], "105": [224, 0, 64], "106": [96, 128, 64], "107": [224, 128, 64], "108": [96, 0, 192], "109": [224, 0, 192], "110": [96, 128, 192], "111": [224, 128, 192], "112": [32, 64, 64], "113": [160, 64, 64], "114": [32, 192, 64], "115": [160, 192, 64], "116": [32, 64, 192], "117": [160, 64, 192], "118": [32, 192, 192], "119": [160, 192, 192], "120": [96, 64, 64], "121": [224, 64, 64], "122": [96, 192, 64], "123": [224, 192, 64], "124": [96, 64, 192], "125": [224, 64, 192], "126": [96, 192, 192], "127": [224, 192, 192], "128": [0, 32, 0], "129": [128, 32, 0], "130": [0, 160, 0], "131": [128, 160, 0], "132": [0, 32, 128], "133": [128, 32, 128], "134": [0, 160, 128], "135": [128, 160, 128], "136": [64, 32, 0], "137": [192, 32, 0], "138": [64, 160, 0], "139": [192, 160, 0], "140": [64, 32, 128], "141": [192, 32, 128], "142": [64, 160, 128], "143": [192, 160, 128], "144": [0, 96, 0], "145": [128, 96, 0], "146": [0, 224, 0], "147": [128, 224, 0], "148": [0, 96, 128], "149": [128, 96, 128], "150": [0, 224, 128], "151": [128, 224, 128], "152": [64, 96, 0], "153": [192, 96, 0], "154": [64, 224, 0], "155": [192, 224, 0], "156": [64, 96, 128], "157": [192, 96, 128], "158": [64, 224, 128], "159": [192, 224, 128], "160": [0, 32, 64], "161": [128, 32, 64], "162": [0, 160, 64], "163": [128, 160, 64], "164": [0, 32, 192], "165": [128, 32, 192], "166": [0, 160, 192], "167": [128, 160, 192], "168": [64, 32, 64], "169": [192, 32, 64], "170": [64, 160, 64], "171": [192, 160, 64], "172": [64, 32, 192], "173": [192, 32, 192], "174": [64, 160, 192], "175": [192, 160, 192], "176": [0, 96, 64], "177": [128, 96, 64], "178": [0, 224, 64], "179": [128, 224, 64], "180": [0, 96, 192], "181": [128, 96, 192], "182": [0, 224, 192], "183": [128, 224, 192], "184": [64, 96, 64], "185": [192, 96, 64], "186": [64, 224, 64], "187": [192, 224, 64], "188": [64, 96, 192], "189": [192, 96, 192], "190": [64, 224, 192], "191": [192, 224, 192], "192": [32, 32, 0], "193": [160, 32, 0], "194": [32, 160, 0], "195": [160, 160, 0], "196": [32, 32, 128], "197": [160, 32, 128], "198": [32, 160, 128], "199": [160, 160, 128], "200": [96, 32, 0], "201": [224, 32, 0], "202": [96, 160, 0], "203": [224, 160, 0], "204": [96, 32, 128], "205": [224, 32, 128], "206": [96, 160, 128], "207": [224, 160, 128], "208": [32, 96, 0], "209": [160, 96, 0], "210": [32, 224, 0], "211": [160, 224, 0], "212": [32, 96, 128], "213": [160, 96, 128], "214": [32, 224, 128], "215": [160, 224, 128], "216": [96, 96, 0], "217": [224, 96, 0], "218": [96, 224, 0], "219": [224, 224, 0], "220": [96, 96, 128], "221": [224, 96, 128], "222": [96, 224, 128], "223": [224, 224, 128], "224": [32, 32, 64], "225": [160, 32, 64], "226": [32, 160, 64], "227": [160, 160, 64], "228": [32, 32, 192], "229": [160, 32, 192], "230": [32, 160, 192], "231": [160, 160, 192], "232": [96, 32, 64], "233": [224, 32, 64], "234": [96, 160, 64], "235": [224, 160, 64], "236": [96, 32, 192], "237": [224, 32, 192], "238": [96, 160, 192], "239": [224, 160, 192], "240": [32, 96, 64], "241": [160, 96, 64], "242": [32, 224, 64], "243": [160, 224, 64], "244": [32, 96, 192], "245": [160, 96, 192], "246": [32, 224, 192], "247": [160, 224, 192], "248": [96, 96, 64], "249": [224, 96, 64], "250": [96, 224, 64], "251": [224, 224, 64], "252": [96, 96, 192], "253": [224, 96, 192], "254": [96, 224, 192], "255": [224, 224, 192]} ================================================ FILE: pytorch_segmentation/lraspp/pascal_voc_classes.json ================================================ { "aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, "bottle": 5, "bus": 6, "car": 7, "cat": 8, "chair": 9, "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, "motorbike": 14, "person": 15, "pottedplant": 16, "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20 } ================================================ FILE: pytorch_segmentation/lraspp/predict.py ================================================ import os import time import json import torch from torchvision import transforms import numpy as np from PIL import Image from src import lraspp_mobilenetv3_large def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): classes = 20 weights_path = "./save_weights/model_29.pth" img_path = "./test.jpg" palette_path = "./palette.json" assert os.path.exists(weights_path), f"weights {weights_path} not found." assert os.path.exists(img_path), f"image {img_path} not found." assert os.path.exists(palette_path), f"palette {palette_path} not found." with open(palette_path, "rb") as f: pallette_dict = json.load(f) pallette = [] for v in pallette_dict.values(): pallette += v # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = lraspp_mobilenetv3_large(num_classes=classes+1) # load weights weights_dict = torch.load(weights_path, map_location='cpu')['model'] model.load_state_dict(weights_dict) model.to(device) # load image original_img = Image.open(img_path) # from pil image to tensor and normalize data_transform = transforms.Compose([transforms.Resize(520), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init model img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() output = model(img.to(device)) t_end = time_synchronized() print("inference time: {}".format(t_end - t_start)) prediction = output['out'].argmax(1).squeeze(0) prediction = prediction.to("cpu").numpy().astype(np.uint8) mask = Image.fromarray(prediction) mask.putpalette(pallette) mask.save("test_result.png") if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/lraspp/requirements.txt ================================================ numpy==1.22.0 torch==1.10.0 torchvision==0.11.1 Pillow ================================================ FILE: pytorch_segmentation/lraspp/results20211028-105233.txt ================================================ [epoch: 0] train_loss: 0.5343 lr: 0.000100 global correct: 93.1 average row correct: ['96.8', '90.0', '73.9', '87.9', '79.4', '66.2', '92.1', '79.5', '90.9', '45.0', '88.9', '54.7', '85.8', '89.8', '87.5', '91.2', '66.8', '85.0', '68.4', '87.6', '71.6'] IoU: ['92.4', '85.7', '34.8', '84.3', '66.4', '59.9', '89.2', '71.2', '86.0', '34.6', '82.3', '46.1', '78.6', '82.1', '79.8', '82.5', '54.8', '79.4', '50.2', '83.8', '65.5'] mean IoU: 70.9 [epoch: 1] train_loss: 0.4683 lr: 0.000077 global correct: 93.2 average row correct: ['96.2', '92.6', '75.2', '92.3', '82.6', '70.9', '93.5', '83.9', '93.5', '47.9', '91.0', '61.9', '87.0', '90.5', '89.8', '90.0', '68.1', '86.4', '70.4', '90.4', '75.5'] IoU: ['92.5', '86.1', '34.9', '85.1', '65.3', '63.0', '90.0', '73.1', '86.0', '34.8', '83.2', '50.0', '77.6', '81.2', '79.8', '82.3', '54.3', '78.4', '49.8', '85.5', '67.3'] mean IoU: 71.4 [epoch: 2] train_loss: 0.4053 lr: 0.000054 global correct: 93.1 average row correct: ['95.9', '93.1', '75.9', '92.6', '83.8', '75.3', '94.4', '85.6', '93.7', '50.2', '91.2', '62.1', '87.1', '90.8', '90.3', '89.8', '71.2', '86.8', '71.8', '91.1', '77.5'] IoU: ['92.5', '86.0', '35.1', '84.7', '65.2', '65.6', '90.4', '73.3', '85.9', '34.8', '83.0', '50.0', '77.7', '81.7', '79.2', '82.3', '53.9', '78.5', '49.9', '85.6', '67.2'] mean IoU: 71.6 [epoch: 3] train_loss: 0.4358 lr: 0.000029 global correct: 93.1 average row correct: ['95.8', '93.4', '76.0', '92.3', '83.2', '78.1', '94.0', '86.3', '93.0', '50.9', '91.1', '62.9', '88.0', '90.9', '90.4', '89.6', '71.6', '87.0', '72.4', '92.4', '78.5'] IoU: ['92.5', '86.0', '35.3', '85.1', '66.1', '66.9', '89.8', '73.3', '85.9', '34.8', '83.0', '50.4', '78.0', '81.5', '79.0', '82.1', '54.1', '78.6', '50.0', '85.6', '67.1'] mean IoU: 71.7 [epoch: 4] train_loss: 0.3886 lr: 0.000000 global correct: 93.1 average row correct: ['95.6', '93.8', '76.0', '92.8', '83.6', '77.9', '94.2', '86.1', '93.5', '50.9', '92.0', '63.8', '88.8', '91.4', '90.6', '89.4', '73.2', '87.4', '73.0', '92.4', '78.9'] IoU: ['92.5', '86.0', '35.3', '84.4', '66.2', '66.5', '89.9', '73.2', '85.9', '34.6', '83.2', '50.8', '78.0', '81.4', '78.6', '82.0', '53.6', '78.4', '50.1', '85.7', '66.6'] mean IoU: 71.6 ================================================ FILE: pytorch_segmentation/lraspp/src/__init__.py ================================================ from .lraspp_model import lraspp_mobilenetv3_large ================================================ FILE: pytorch_segmentation/lraspp/src/lraspp_model.py ================================================ from collections import OrderedDict from typing import Dict import torch from torch import nn, Tensor from torch.nn import functional as F from .mobilenet_backbone import mobilenet_v3_large class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Args: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ _version = 2 __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None: if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} # 重新构建backbone,将没有使用到的模块全部删掉 layers = OrderedDict() for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super(IntermediateLayerGetter, self).__init__(layers) self.return_layers = orig_return_layers def forward(self, x: Tensor) -> Dict[str, Tensor]: out = OrderedDict() for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class LRASPP(nn.Module): """ Implements a Lite R-ASPP Network for semantic segmentation from `"Searching for MobileNetV3" `_. Args: backbone (nn.Module): the network used to compute the features for the model. The backbone should return an OrderedDict[Tensor], with the key being "high" for the high level feature map and "low" for the low level feature map. low_channels (int): the number of channels of the low level features. high_channels (int): the number of channels of the high level features. num_classes (int): number of output classes of the model (including the background). inter_channels (int, optional): the number of channels for intermediate computations. """ __constants__ = ['aux_classifier'] def __init__(self, backbone: nn.Module, low_channels: int, high_channels: int, num_classes: int, inter_channels: int = 128) -> None: super(LRASPP, self).__init__() self.backbone = backbone self.classifier = LRASPPHead(low_channels, high_channels, num_classes, inter_channels) def forward(self, x: Tensor) -> Dict[str, Tensor]: input_shape = x.shape[-2:] features = self.backbone(x) out = self.classifier(features) out = F.interpolate(out, size=input_shape, mode="bilinear", align_corners=False) result = OrderedDict() result["out"] = out return result class LRASPPHead(nn.Module): def __init__(self, low_channels: int, high_channels: int, num_classes: int, inter_channels: int) -> None: super(LRASPPHead, self).__init__() self.cbr = nn.Sequential( nn.Conv2d(high_channels, inter_channels, 1, bias=False), nn.BatchNorm2d(inter_channels), nn.ReLU(inplace=True) ) self.scale = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(high_channels, inter_channels, 1, bias=False), nn.Sigmoid() ) self.low_classifier = nn.Conv2d(low_channels, num_classes, 1) self.high_classifier = nn.Conv2d(inter_channels, num_classes, 1) def forward(self, inputs: Dict[str, Tensor]) -> Tensor: low = inputs["low"] high = inputs["high"] x = self.cbr(high) s = self.scale(high) x = x * s x = F.interpolate(x, size=low.shape[-2:], mode="bilinear", align_corners=False) return self.low_classifier(low) + self.high_classifier(x) def lraspp_mobilenetv3_large(num_classes=21, pretrain_backbone=False): # 'mobilenetv3_large_imagenet': 'https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth' # 'lraspp_mobilenet_v3_large_coco': 'https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth' backbone = mobilenet_v3_large(dilated=True) if pretrain_backbone: # 载入mobilenetv3 large backbone预训练权重 backbone.load_state_dict(torch.load("mobilenet_v3_large.pth", map_location='cpu')) backbone = backbone.features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. # The first and last blocks are always included because they are the C0 (conv1) and Cn. stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "is_strided", False)] + [len(backbone) - 1] low_pos = stage_indices[-4] # use C2 here which has output_stride = 8 high_pos = stage_indices[-1] # use C5 which has output_stride = 16 low_channels = backbone[low_pos].out_channels high_channels = backbone[high_pos].out_channels return_layers = {str(low_pos): "low", str(high_pos): "high"} backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) model = LRASPP(backbone, low_channels, high_channels, num_classes) return model ================================================ FILE: pytorch_segmentation/lraspp/src/mobilenet_backbone.py ================================================ from typing import Callable, List, Optional import torch from torch import nn, Tensor from torch.nn import functional as F from functools import partial def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNActivation(nn.Sequential): def __init__(self, in_planes: int, out_planes: int, kernel_size: int = 3, stride: int = 1, groups: int = 1, norm_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None, dilation: int = 1): padding = (kernel_size - 1) // 2 * dilation if norm_layer is None: norm_layer = nn.BatchNorm2d if activation_layer is None: activation_layer = nn.ReLU6 super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, dilation=dilation, padding=padding, groups=groups, bias=False), norm_layer(out_planes), activation_layer(inplace=True)) self.out_channels = out_planes class SqueezeExcitation(nn.Module): def __init__(self, input_c: int, squeeze_factor: int = 4): super(SqueezeExcitation, self).__init__() squeeze_c = _make_divisible(input_c // squeeze_factor, 8) self.fc1 = nn.Conv2d(input_c, squeeze_c, 1) self.fc2 = nn.Conv2d(squeeze_c, input_c, 1) def forward(self, x: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) return scale * x class InvertedResidualConfig: def __init__(self, input_c: int, kernel: int, expanded_c: int, out_c: int, use_se: bool, activation: str, stride: int, dilation: int, width_multi: float): self.input_c = self.adjust_channels(input_c, width_multi) self.kernel = kernel self.expanded_c = self.adjust_channels(expanded_c, width_multi) self.out_c = self.adjust_channels(out_c, width_multi) self.use_se = use_se self.use_hs = activation == "HS" # whether using h-swish activation self.stride = stride self.dilation = dilation @staticmethod def adjust_channels(channels: int, width_multi: float): return _make_divisible(channels * width_multi, 8) class InvertedResidual(nn.Module): def __init__(self, cnf: InvertedResidualConfig, norm_layer: Callable[..., nn.Module]): super(InvertedResidual, self).__init__() if cnf.stride not in [1, 2]: raise ValueError("illegal stride value.") self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c) layers: List[nn.Module] = [] activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU # expand if cnf.expanded_c != cnf.input_c: layers.append(ConvBNActivation(cnf.input_c, cnf.expanded_c, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer)) # depthwise stride = 1 if cnf.dilation > 1 else cnf.stride layers.append(ConvBNActivation(cnf.expanded_c, cnf.expanded_c, kernel_size=cnf.kernel, stride=stride, dilation=cnf.dilation, groups=cnf.expanded_c, norm_layer=norm_layer, activation_layer=activation_layer)) if cnf.use_se: layers.append(SqueezeExcitation(cnf.expanded_c)) # project layers.append(ConvBNActivation(cnf.expanded_c, cnf.out_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)) self.block = nn.Sequential(*layers) self.out_channels = cnf.out_c self.is_strided = cnf.stride > 1 def forward(self, x: Tensor) -> Tensor: result = self.block(x) if self.use_res_connect: result += x return result class MobileNetV3(nn.Module): def __init__(self, inverted_residual_setting: List[InvertedResidualConfig], last_channel: int, num_classes: int = 1000, block: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None): super(MobileNetV3, self).__init__() if not inverted_residual_setting: raise ValueError("The inverted_residual_setting should not be empty.") elif not (isinstance(inverted_residual_setting, List) and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])): raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") if block is None: block = InvertedResidual if norm_layer is None: norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) layers: List[nn.Module] = [] # building first layer firstconv_output_c = inverted_residual_setting[0].input_c layers.append(ConvBNActivation(3, firstconv_output_c, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=nn.Hardswish)) # building inverted residual blocks for cnf in inverted_residual_setting: layers.append(block(cnf, norm_layer)) # building last several layers lastconv_input_c = inverted_residual_setting[-1].out_c lastconv_output_c = 6 * lastconv_input_c layers.append(ConvBNActivation(lastconv_input_c, lastconv_output_c, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Hardswish)) self.features = nn.Sequential(*layers) self.avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel), nn.Hardswish(inplace=True), nn.Dropout(p=0.2, inplace=True), nn.Linear(last_channel, num_classes)) # initial weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out") if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) def _forward_impl(self, x: Tensor) -> Tensor: x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x def forward(self, x: Tensor) -> Tensor: return self._forward_impl(x) def mobilenet_v3_large(num_classes: int = 1000, reduced_tail: bool = False, dilated: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. dilated: whether using dilated conv """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 dilation = 2 if dilated else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation bneck_conf(16, 3, 16, 16, False, "RE", 1, 1), bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1 bneck_conf(24, 3, 72, 24, False, "RE", 1, 1), bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2 bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3 bneck_conf(80, 3, 200, 80, False, "HS", 1, 1), bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), bneck_conf(80, 3, 480, 112, True, "HS", 1, 1), bneck_conf(112, 3, 672, 112, True, "HS", 1, 1), bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4 bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), ] last_channel = adjust_channels(1280 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) def mobilenet_v3_small(num_classes: int = 1000, reduced_tail: bool = False, dilated: bool = False) -> MobileNetV3: """ Constructs a large MobileNetV3 architecture from "Searching for MobileNetV3" . weights_link: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth Args: num_classes (int): number of classes reduced_tail (bool): If True, reduces the channel counts of all feature layers between C4 and C5 by 2. It is used to reduce the channel redundancy in the backbone for Detection and Segmentation. dilated: whether using dilated conv """ width_multi = 1.0 bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi) adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi) reduce_divider = 2 if reduced_tail else 1 dilation = 2 if dilated else 1 inverted_residual_setting = [ # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1 bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2 bneck_conf(24, 3, 88, 24, False, "RE", 1, 1), bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3 bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), bneck_conf(40, 5, 120, 48, True, "HS", 1, 1), bneck_conf(48, 5, 144, 48, True, "HS", 1, 1), bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4 bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation) ] last_channel = adjust_channels(1024 // reduce_divider) # C5 return MobileNetV3(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, num_classes=num_classes) ================================================ FILE: pytorch_segmentation/lraspp/train.py ================================================ import os import time import datetime import torch from src import lraspp_mobilenetv3_large from train_utils import train_one_epoch, evaluate, create_lr_scheduler from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(2.0 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train): base_size = 520 crop_size = 480 return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size) def create_model(num_classes, pretrain=True): model = lraspp_mobilenetv3_large(num_classes=num_classes) if pretrain: weights_dict = torch.load("./lraspp_mobilenet_v3_large.pth", map_location='cpu') if num_classes != 21: # 官方提供的预训练权重是21类(包括背景) # 如果训练自己的数据集,将和类别相关的权重删除,防止权重shape不一致报错 for k in list(weights_dict.keys()): if "low_classifier" in k or "high_classifier" in k: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") batch_size = args.batch_size # segmentation nun_classes + background num_classes = args.num_classes + 1 # 用来保存训练以及验证过程中信息 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt train_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=True), txt_name="train.txt") # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=False), txt_name="val.txt") num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=num_classes) model.to(device) params_to_optimize = [ {"params": [p for p in model.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model.classifier.parameters() if p.requires_grad]} ] optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) start_time = time.time() for epoch in range(args.start_epoch, args.epochs): mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" f.write(train_info + val_info + "\n\n") save_file = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args} if args.amp: save_file["scaler"] = scaler.state_dict() torch.save(save_file, "save_weights/model_{}.pth".format(epoch)) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("training time {}".format(total_time_str)) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch lraspp training") parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") parser.add_argument("--num-classes", default=20, type=int) parser.add_argument("--device", default="cuda", help="training device") parser.add_argument("-b", "--batch-size", default=4, type=int) parser.add_argument("--epochs", default=30, type=int, metavar="N", help="number of total epochs to train") parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('--print-freq', default=10, type=int, help='print frequency') parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='start epoch') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not os.path.exists("./save_weights"): os.mkdir("./save_weights") main(args) ================================================ FILE: pytorch_segmentation/lraspp/train_multi_GPU.py ================================================ import time import os import datetime import torch from src import lraspp_mobilenetv3_large from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(2.0 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train): base_size = 520 crop_size = 480 return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size) def create_model(num_classes): model = lraspp_mobilenetv3_large(num_classes=num_classes) weights_dict = torch.load("./deeplabv3_resnet50_coco.pth", map_location='cpu') if num_classes != 21: # 官方提供的预训练权重是21类(包括背景) # 如果训练自己的数据集,将和类别相关的权重删除,防止权重shape不一致报错 for k in list(weights_dict.keys()): if "low_classifier" in k or "high_classifier" in k: del weights_dict[k] missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # segmentation nun_classes + background num_classes = args.num_classes + 1 # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) VOC_root = args.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) # load train data set # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt train_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=True), txt_name="train.txt") # load validation data set # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=get_transform(train=False), txt_name="val.txt") print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn, drop_last=True) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + 20 classes model = create_model(num_classes=num_classes) model.to(device) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params_to_optimize = [ {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]}, ] optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" f.write(train_info + val_info + "\n\n") if args.output_dir: # 只在主节点上执行保存权重操作 save_file = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_file["scaler"] = scaler.state_dict() save_on_master(save_file, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='/data/', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=20, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=20, type=int, metavar='N', help='number of total epochs to run') # 是否使用同步BN(在多个GPU之间同步),默认不开启,开启后训练速度会变慢 parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 训练学习率,这里默认设置成0.0001,如果效果不好可以尝试加大学习率 parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 分布式进程数 parser.add_argument('--world-size', default=1, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_segmentation/lraspp/train_utils/__init__.py ================================================ from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler from .distributed_utils import init_distributed_mode, save_on_master, mkdir ================================================ FILE: pytorch_segmentation/lraspp/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import time import torch import torch.distributed as dist import errno import os class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) class ConfusionMatrix(object): def __init__(self, num_classes): self.num_classes = num_classes self.mat = None def update(self, a, b): n = self.num_classes if self.mat is None: # 创建混淆矩阵 self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) with torch.no_grad(): # 寻找GT中为目标的像素索引 k = (a >= 0) & (a < n) # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙) inds = n * a[k].to(torch.int64) + b[k] self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) def reset(self): if self.mat is not None: self.mat.zero_() def compute(self): h = self.mat.float() # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数) acc_global = torch.diag(h).sum() / h.sum() # 计算每个类别的准确率 acc = torch.diag(h) / h.sum(1) # 计算每个类别预测与真实目标的iou iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) return acc_global, acc, iu def reduce_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() torch.distributed.all_reduce(self.mat) def __str__(self): acc_global, acc, iu = self.compute() return ( 'global correct: {:.1f}\n' 'average row correct: {}\n' 'IoU: {}\n' 'mean IoU: {:.1f}').format( acc_global.item() * 100, ['{:.1f}'.format(i) for i in (acc * 100).tolist()], ['{:.1f}'.format(i) for i in (iu * 100).tolist()], iu.mean().item() * 100) class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0: eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {}'.format(header, total_time_str)) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables printing when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_segmentation/lraspp/train_utils/train_and_eval.py ================================================ import torch from torch import nn import train_utils.distributed_utils as utils def criterion(inputs, target): losses = {} for name, x in inputs.items(): # 忽略target中值为255的像素,255的像素是目标边缘或者padding填充 losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255) if len(losses) == 1: return losses['out'] return losses['out'] + 0.5 * losses['aux'] def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output['out'] confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.reduce_from_all_processes() return confmat def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr def create_lr_scheduler(optimizer, num_step: int, epochs: int, warmup=True, warmup_epochs=1, warmup_factor=1e-3): assert num_step > 0 and epochs > 0 if warmup is False: warmup_epochs = 0 def f(x): """ 根据step数返回一个学习率倍率因子, 注意在训练开始之前,pytorch会提前调用一次lr_scheduler.step()方法 """ if warmup is True and x <= (warmup_epochs * num_step): alpha = float(x) / (warmup_epochs * num_step) # warmup过程中lr倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha else: # warmup后lr倍率因子从1 -> 0 # 参考deeplab_v2: Learning rate policy return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9 return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) ================================================ FILE: pytorch_segmentation/lraspp/transforms.py ================================================ import numpy as np import random import torch from torchvision import transforms as T from torchvision.transforms import functional as F def pad_if_smaller(img, size, fill=0): # 如果图像最小边长小于给定size,则用数值fill进行padding min_size = min(img.size) if min_size < size: ow, oh = img.size padh = size - oh if oh < size else 0 padw = size - ow if ow < size else 0 img = F.pad(img, (0, 0, padw, padh), fill=fill) return img class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class RandomResize(object): def __init__(self, min_size, max_size=None): self.min_size = min_size if max_size is None: max_size = min_size self.max_size = max_size def __call__(self, image, target): size = random.randint(self.min_size, self.max_size) # 这里size传入的是int类型,所以是将图像的最小边长缩放到size大小 image = F.resize(image, size) # 这里的interpolation注意下,在torchvision(0.9.0)以后才有InterpolationMode.NEAREST # 如果是之前的版本需要使用PIL.Image.NEAREST target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST) return image, target class RandomHorizontalFlip(object): def __init__(self, flip_prob): self.flip_prob = flip_prob def __call__(self, image, target): if random.random() < self.flip_prob: image = F.hflip(image) target = F.hflip(target) return image, target class RandomCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = pad_if_smaller(image, self.size) target = pad_if_smaller(target, self.size, fill=255) crop_params = T.RandomCrop.get_params(image, (self.size, self.size)) image = F.crop(image, *crop_params) target = F.crop(target, *crop_params) return image, target class CenterCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = F.center_crop(image, self.size) target = F.center_crop(target, self.size) return image, target class ToTensor(object): def __call__(self, image, target): image = F.to_tensor(image) target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target ================================================ FILE: pytorch_segmentation/lraspp/validation.py ================================================ import os import torch from src import lraspp_mobilenetv3_large from train_utils import evaluate from my_dataset import VOCSegmentation import transforms as T class SegmentationPresetEval: def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.RandomResize(base_size, base_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") assert os.path.exists(args.weights), f"weights {args.weights} not found." # segmentation nun_classes + background num_classes = args.num_classes + 1 # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt val_dataset = VOCSegmentation(args.data_path, year="2012", transforms=SegmentationPresetEval(520), txt_name="val.txt") num_workers = 8 val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = lraspp_mobilenetv3_large(num_classes=num_classes) model.load_state_dict(torch.load(args.weights, map_location=device)['model']) model.to(device) confmat = evaluate(model, val_loader, device=device, num_classes=num_classes) print(confmat) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch lraspp validation") parser.add_argument("--data-path", default="/data/", help="VOCdevkit root") parser.add_argument("--weights", default="./save_weights/model_29.pth") parser.add_argument("--num-classes", default=20, type=int) parser.add_argument("--device", default="cuda", help="training device") parser.add_argument('--print-freq', default=10, type=int, help='print frequency') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() main(args) ================================================ FILE: pytorch_segmentation/u2net/README.md ================================================ # U2-Net(Going Deeper with Nested U-Structure for Salient Object Detection) ## 该项目主要是来自官方的源码 - https://github.com/xuebinqin/U-2-Net - 注意,该项目是针对显著性目标检测领域(Salient Object Detection / SOD) ## 环境配置: - Python3.6/3.7/3.8 - Pytorch1.10 - Ubuntu或Centos(Windows暂不支持多GPU训练) - 建议使用GPU训练 - 详细环境配置见`requirements.txt` ## 文件结构 ``` ├── src: 搭建网络相关代码 ├── train_utils: 训练以及验证相关代码 ├── my_dataset.py: 自定义数据集读取相关代码 ├── predict.py: 简易的预测代码 ├── train.py: 单GPU或CPU训练代码 ├── train_multi_GPU.py: 多GPU并行训练代码 ├── validation.py: 单独验证模型相关代码 ├── transforms.py: 数据预处理相关代码 └── requirements.txt: 项目依赖 ``` ## DUTS数据集准备 - DUTS数据集官方下载地址:[http://saliencydetection.net/duts/](http://saliencydetection.net/duts/) - 如果下载不了,可以通过我提供的百度云下载,链接: https://pan.baidu.com/s/1nBI6GTN0ZilqH4Tvu18dow 密码: r7k6 - 其中DUTS-TR为训练集,DUTS-TE是测试(验证)集,数据集解压后目录结构如下: ``` ├── DUTS-TR │ ├── DUTS-TR-Image: 该文件夹存放所有训练集的图片 │ └── DUTS-TR-Mask: 该文件夹存放对应训练图片的GT标签(Mask蒙板形式) │ └── DUTS-TE ├── DUTS-TE-Image: 该文件夹存放所有测试(验证)集的图片 └── DUTS-TE-Mask: 该文件夹存放对应测试(验证)图片的GT标签(Mask蒙板形式) ``` - 注意训练或者验证过程中,将`--data-path`指向`DUTS-TR`所在根目录 ## 官方权重 从官方转换得到的权重: - `u2net_full.pth`下载链接: https://pan.baidu.com/s/1ojJZS8v3F_eFKkF3DEdEXA 密码: fh1v - `u2net_lite.pth`下载链接: https://pan.baidu.com/s/1TIWoiuEz9qRvTX9quDqQHg 密码: 5stj `u2net_full`在DUTS-TE上的验证结果(使用`validation.py`进行验证): ``` MAE: 0.044 maxF1: 0.868 ``` **注:** - 这里的maxF1和原论文中的结果有些差异,经过对比发现差异主要来自post_norm,原仓库中会对预测结果进行post_norm,但在本仓库中将post_norm给移除了。 如果加上post_norm这里的maxF1为`0.872`,如果需要做该后处理可自行添加,post_norm流程如下,其中output为验证时网络预测的输出: ```python ma = torch.max(output) mi = torch.min(output) output = (output - mi) / (ma - mi) ``` - 如果要载入官方提供的权重,需要将`src/model.py`中`ConvBNReLU`类里卷积的bias设置成True,因为官方代码里没有进行设置(Conv2d的bias默认为True)。 因为卷积后跟了BN,所以bias是起不到作用的,所以在本仓库中默认将bias设置为False。 ## 训练记录(`u2net_full`) 训练指令: ``` torchrun --nproc_per_node=4 train_multi_GPU.py --lr 0.004 --amp ``` 训练最终在DUTS-TE上的验证结果: ``` MAE: 0.047 maxF1: 0.859 ``` 训练过程详情可见results.txt文件,训练权重下载链接: https://pan.baidu.com/s/1df2jMkrjbgEv-r1NMaZCZg 密码: n4l6 ## 训练方法 * 确保提前准备好数据集 * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py` ## 如果对U2Net网络不了解的可参考我的bilibili - [https://www.bilibili.com/video/BV1yB4y1z7m](https://www.bilibili.com/video/BV1yB4y1z7m) ## 进一步了解该项目,以及对U2Net代码的分析可参考我的bilibili - [https://www.bilibili.com/video/BV1Kt4y137iS](https://www.bilibili.com/video/BV1Kt4y137iS) ## U2NET网络结构 ![u2net](./u2net.png) ================================================ FILE: pytorch_segmentation/u2net/convert_weight.py ================================================ import re import torch from src import u2net_full, u2net_lite layers = {"encode": [7, 6, 5, 4, 4, 4], "decode": [4, 4, 5, 6, 7]} def convert_conv_bn(new_weight, prefix, ks, v): if "conv" in ks[0]: if "weight" == ks[1]: new_weight[prefix + ".conv.weight"] = v elif "bias" == ks[1]: new_weight[prefix + ".conv.bias"] = v else: print(f"unrecognized weight {prefix + ks[1]}") return if "bn" in ks[0]: if "running_mean" == ks[1]: new_weight[prefix + ".bn.running_mean"] = v elif "running_var" == ks[1]: new_weight[prefix + ".bn.running_var"] = v elif "weight" == ks[1]: new_weight[prefix + ".bn.weight"] = v elif "bias" == ks[1]: new_weight[prefix + ".bn.bias"] = v elif "num_batches_tracked" == ks[1]: return else: print(f"unrecognized weight {prefix + ks[1]}") return def convert(old_weight: dict): new_weight = {} for k, v in old_weight.items(): ks = k.split(".") if ("stage" in ks[0]) and ("d" not in ks[0]): # encode stage num = int(re.findall(r'\d', ks[0])[0]) - 1 prefix = f"encode_modules.{num}" if "rebnconvin" == ks[1]: # ConvBNReLU module prefix += ".conv_in" convert_conv_bn(new_weight, prefix, ks[2:], v) elif ("rebnconv" in ks[1]) and ("d" not in ks[1]): num_ = int(re.findall(r'\d', ks[1])[0]) - 1 prefix += f".encode_modules.{num_}" convert_conv_bn(new_weight, prefix, ks[2:], v) elif ("rebnconv" in ks[1]) and ("d" in ks[1]): num_ = layers["encode"][num] - int(re.findall(r'\d', ks[1])[0]) - 1 prefix += f".decode_modules.{num_}" convert_conv_bn(new_weight, prefix, ks[2:], v) else: print(f"unrecognized key: {k}") elif ("stage" in ks[0]) and ("d" in ks[0]): # decode stage num = 5 - int(re.findall(r'\d', ks[0])[0]) prefix = f"decode_modules.{num}" if "rebnconvin" == ks[1]: # ConvBNReLU module prefix += ".conv_in" convert_conv_bn(new_weight, prefix, ks[2:], v) elif ("rebnconv" in ks[1]) and ("d" not in ks[1]): num_ = int(re.findall(r'\d', ks[1])[0]) - 1 prefix += f".encode_modules.{num_}" convert_conv_bn(new_weight, prefix, ks[2:], v) elif ("rebnconv" in ks[1]) and ("d" in ks[1]): num_ = layers["decode"][num] - int(re.findall(r'\d', ks[1])[0]) - 1 prefix += f".decode_modules.{num_}" convert_conv_bn(new_weight, prefix, ks[2:], v) else: print(f"unrecognized key: {k}") elif "side" in ks[0]: # side num = 6 - int(re.findall(r'\d', ks[0])[0]) prefix = f"side_modules.{num}" if "weight" == ks[1]: new_weight[prefix + ".weight"] = v elif "bias" == ks[1]: new_weight[prefix + ".bias"] = v else: print(f"unrecognized weight {prefix + ks[1]}") elif "outconv" in ks[0]: prefix = f"out_conv" if "weight" == ks[1]: new_weight[prefix + ".weight"] = v elif "bias" == ks[1]: new_weight[prefix + ".bias"] = v else: print(f"unrecognized weight {prefix + ks[1]}") else: print(f"unrecognized key: {k}") return new_weight def main_1(): from u2net import U2NET, U2NETP old_m = U2NET() old_m.load_state_dict(torch.load("u2net.pth", map_location='cpu')) new_m = u2net_full() # old_m = U2NETP() # old_m.load_state_dict(torch.load("u2netp.pth", map_location='cpu')) # new_m = u2net_lite() old_w = old_m.state_dict() w = convert(old_w) new_m.load_state_dict(w, strict=True) torch.random.manual_seed(0) x = torch.randn(1, 3, 288, 288) old_m.eval() new_m.eval() with torch.no_grad(): out1 = old_m(x)[0] out2 = new_m(x) assert torch.equal(out1, out2) torch.save(new_m.state_dict(), "u2net_full.pth") def main(): old_w = torch.load("u2net.pth", map_location='cpu') new_m = u2net_full() # old_w = torch.load("u2netp.pth", map_location='cpu') # new_m = u2net_lite() w = convert(old_w) new_m.load_state_dict(w, strict=True) torch.save(new_m.state_dict(), "u2net_full.pth") if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/u2net/my_dataset.py ================================================ import os import cv2 import torch.utils.data as data class DUTSDataset(data.Dataset): def __init__(self, root: str, train: bool = True, transforms=None): assert os.path.exists(root), f"path '{root}' does not exist." if train: self.image_root = os.path.join(root, "DUTS-TR", "DUTS-TR-Image") self.mask_root = os.path.join(root, "DUTS-TR", "DUTS-TR-Mask") else: self.image_root = os.path.join(root, "DUTS-TE", "DUTS-TE-Image") self.mask_root = os.path.join(root, "DUTS-TE", "DUTS-TE-Mask") assert os.path.exists(self.image_root), f"path '{self.image_root}' does not exist." assert os.path.exists(self.mask_root), f"path '{self.mask_root}' does not exist." image_names = [p for p in os.listdir(self.image_root) if p.endswith(".jpg")] mask_names = [p for p in os.listdir(self.mask_root) if p.endswith(".png")] assert len(image_names) > 0, f"not find any images in {self.image_root}." # check images and mask re_mask_names = [] for p in image_names: mask_name = p.replace(".jpg", ".png") assert mask_name in mask_names, f"{p} has no corresponding mask." re_mask_names.append(mask_name) mask_names = re_mask_names self.images_path = [os.path.join(self.image_root, n) for n in image_names] self.masks_path = [os.path.join(self.mask_root, n) for n in mask_names] self.transforms = transforms def __getitem__(self, idx): image_path = self.images_path[idx] mask_path = self.masks_path[idx] image = cv2.imread(image_path, flags=cv2.IMREAD_COLOR) assert image is not None, f"failed to read image: {image_path}" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # BGR -> RGB h, w, _ = image.shape target = cv2.imread(mask_path, flags=cv2.IMREAD_GRAYSCALE) assert target is not None, f"failed to read mask: {mask_path}" if self.transforms is not None: image, target = self.transforms(image, target) return image, target def __len__(self): return len(self.images_path) @staticmethod def collate_fn(batch): images, targets = list(zip(*batch)) batched_imgs = cat_list(images, fill_value=0) batched_targets = cat_list(targets, fill_value=0) return batched_imgs, batched_targets def cat_list(images, fill_value=0): max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) batch_shape = (len(images),) + max_size batched_imgs = images[0].new(*batch_shape).fill_(fill_value) for img, pad_img in zip(images, batched_imgs): pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) return batched_imgs if __name__ == '__main__': train_dataset = DUTSDataset("./", train=True) print(len(train_dataset)) val_dataset = DUTSDataset("./", train=False) print(len(val_dataset)) i, t = train_dataset[0] ================================================ FILE: pytorch_segmentation/u2net/predict.py ================================================ import os import time import cv2 import numpy as np import matplotlib.pyplot as plt import torch from torchvision.transforms import transforms from src import u2net_full def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): weights_path = "./u2net_full.pth" img_path = "./test.png" threshold = 0.5 assert os.path.exists(img_path), f"image file {img_path} dose not exists." device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize(320), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) origin_img = cv2.cvtColor(cv2.imread(img_path, flags=cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) h, w = origin_img.shape[:2] img = data_transform(origin_img) img = torch.unsqueeze(img, 0).to(device) # [C, H, W] -> [1, C, H, W] model = u2net_full() weights = torch.load(weights_path, map_location='cpu') if "model" in weights: model.load_state_dict(weights["model"]) else: model.load_state_dict(weights) model.to(device) model.eval() with torch.no_grad(): # init model img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() pred = model(img) t_end = time_synchronized() print("inference time: {}".format(t_end - t_start)) pred = torch.squeeze(pred).to("cpu").numpy() # [1, 1, H, W] -> [H, W] pred = cv2.resize(pred, dsize=(w, h), interpolation=cv2.INTER_LINEAR) pred_mask = np.where(pred > threshold, 1, 0) origin_img = np.array(origin_img, dtype=np.uint8) seg_img = origin_img * pred_mask[..., None] plt.imshow(seg_img) plt.show() cv2.imwrite("pred_result.png", cv2.cvtColor(seg_img.astype(np.uint8), cv2.COLOR_RGB2BGR)) if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/u2net/requirements.txt ================================================ numpy==1.22.0 torch==1.13.1 torchvision==0.11.1 opencv_python==4.5.4.60 ================================================ FILE: pytorch_segmentation/u2net/results20220723-123632.txt ================================================ [epoch: 0] train_loss: 2.7385 lr: 0.002002 MAE: 0.465 maxF1: 0.464 [epoch: 10] train_loss: 1.0385 lr: 0.003994 MAE: 0.124 maxF1: 0.719 [epoch: 20] train_loss: 0.7629 lr: 0.003972 MAE: 0.077 maxF1: 0.787 [epoch: 30] train_loss: 0.6758 lr: 0.003936 MAE: 0.083 maxF1: 0.791 [epoch: 40] train_loss: 0.4905 lr: 0.003884 MAE: 0.073 maxF1: 0.805 [epoch: 50] train_loss: 0.4337 lr: 0.003818 MAE: 0.063 maxF1: 0.821 [epoch: 60] train_loss: 0.4157 lr: 0.003738 MAE: 0.067 maxF1: 0.818 [epoch: 70] train_loss: 0.3424 lr: 0.003644 MAE: 0.058 maxF1: 0.840 [epoch: 80] train_loss: 0.2909 lr: 0.003538 MAE: 0.057 maxF1: 0.842 [epoch: 90] train_loss: 0.3220 lr: 0.003420 MAE: 0.064 maxF1: 0.837 [epoch: 100] train_loss: 0.2653 lr: 0.003292 MAE: 0.055 maxF1: 0.847 [epoch: 110] train_loss: 0.2627 lr: 0.003153 MAE: 0.055 maxF1: 0.846 [epoch: 120] train_loss: 0.3230 lr: 0.003005 MAE: 0.058 maxF1: 0.837 [epoch: 130] train_loss: 0.2177 lr: 0.002850 MAE: 0.053 maxF1: 0.852 [epoch: 140] train_loss: 0.2807 lr: 0.002688 MAE: 0.061 maxF1: 0.824 [epoch: 150] train_loss: 0.2091 lr: 0.002520 MAE: 0.057 maxF1: 0.846 [epoch: 160] train_loss: 0.1971 lr: 0.002349 MAE: 0.049 maxF1: 0.857 [epoch: 170] train_loss: 0.2157 lr: 0.002175 MAE: 0.050 maxF1: 0.851 [epoch: 180] train_loss: 0.1881 lr: 0.002000 MAE: 0.048 maxF1: 0.857 [epoch: 190] train_loss: 0.1855 lr: 0.001825 MAE: 0.047 maxF1: 0.860 [epoch: 200] train_loss: 0.1817 lr: 0.001651 MAE: 0.047 maxF1: 0.863 [epoch: 210] train_loss: 0.1740 lr: 0.001480 MAE: 0.048 maxF1: 0.858 [epoch: 220] train_loss: 0.1707 lr: 0.001312 MAE: 0.048 maxF1: 0.860 [epoch: 230] train_loss: 0.1653 lr: 0.001150 MAE: 0.048 maxF1: 0.859 [epoch: 240] train_loss: 0.1652 lr: 0.000995 MAE: 0.046 maxF1: 0.860 [epoch: 250] train_loss: 0.1631 lr: 0.000847 MAE: 0.048 maxF1: 0.857 [epoch: 260] train_loss: 0.1584 lr: 0.000708 MAE: 0.047 maxF1: 0.862 [epoch: 270] train_loss: 0.1590 lr: 0.000580 MAE: 0.047 maxF1: 0.860 [epoch: 280] train_loss: 0.1521 lr: 0.000462 MAE: 0.047 maxF1: 0.861 [epoch: 290] train_loss: 0.1535 lr: 0.000356 MAE: 0.047 maxF1: 0.861 [epoch: 300] train_loss: 0.1520 lr: 0.000262 MAE: 0.047 maxF1: 0.860 [epoch: 310] train_loss: 0.1488 lr: 0.000182 MAE: 0.047 maxF1: 0.860 [epoch: 320] train_loss: 0.1493 lr: 0.000116 MAE: 0.047 maxF1: 0.859 [epoch: 330] train_loss: 0.1470 lr: 0.000064 MAE: 0.047 maxF1: 0.860 [epoch: 340] train_loss: 0.1493 lr: 0.000028 MAE: 0.047 maxF1: 0.859 [epoch: 350] train_loss: 0.1482 lr: 0.000006 MAE: 0.047 maxF1: 0.858 [epoch: 359] train_loss: 0.1518 lr: 0.000000 MAE: 0.047 maxF1: 0.859 ================================================ FILE: pytorch_segmentation/u2net/src/__init__.py ================================================ from .model import u2net_full, u2net_lite ================================================ FILE: pytorch_segmentation/u2net/src/model.py ================================================ from typing import Union, List import torch import torch.nn as nn import torch.nn.functional as F class ConvBNReLU(nn.Module): def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1): super().__init__() padding = kernel_size // 2 if dilation == 1 else dilation self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=padding, dilation=dilation, bias=False) self.bn = nn.BatchNorm2d(out_ch) self.relu = nn.ReLU(inplace=True) def forward(self, x: torch.Tensor) -> torch.Tensor: return self.relu(self.bn(self.conv(x))) class DownConvBNReLU(ConvBNReLU): def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1, flag: bool = True): super().__init__(in_ch, out_ch, kernel_size, dilation) self.down_flag = flag def forward(self, x: torch.Tensor) -> torch.Tensor: if self.down_flag: x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True) return self.relu(self.bn(self.conv(x))) class UpConvBNReLU(ConvBNReLU): def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1, flag: bool = True): super().__init__(in_ch, out_ch, kernel_size, dilation) self.up_flag = flag def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor: if self.up_flag: x1 = F.interpolate(x1, size=x2.shape[2:], mode='bilinear', align_corners=False) return self.relu(self.bn(self.conv(torch.cat([x1, x2], dim=1)))) class RSU(nn.Module): def __init__(self, height: int, in_ch: int, mid_ch: int, out_ch: int): super().__init__() assert height >= 2 self.conv_in = ConvBNReLU(in_ch, out_ch) encode_list = [DownConvBNReLU(out_ch, mid_ch, flag=False)] decode_list = [UpConvBNReLU(mid_ch * 2, mid_ch, flag=False)] for i in range(height - 2): encode_list.append(DownConvBNReLU(mid_ch, mid_ch)) decode_list.append(UpConvBNReLU(mid_ch * 2, mid_ch if i < height - 3 else out_ch)) encode_list.append(ConvBNReLU(mid_ch, mid_ch, dilation=2)) self.encode_modules = nn.ModuleList(encode_list) self.decode_modules = nn.ModuleList(decode_list) def forward(self, x: torch.Tensor) -> torch.Tensor: x_in = self.conv_in(x) x = x_in encode_outputs = [] for m in self.encode_modules: x = m(x) encode_outputs.append(x) x = encode_outputs.pop() for m in self.decode_modules: x2 = encode_outputs.pop() x = m(x, x2) return x + x_in class RSU4F(nn.Module): def __init__(self, in_ch: int, mid_ch: int, out_ch: int): super().__init__() self.conv_in = ConvBNReLU(in_ch, out_ch) self.encode_modules = nn.ModuleList([ConvBNReLU(out_ch, mid_ch), ConvBNReLU(mid_ch, mid_ch, dilation=2), ConvBNReLU(mid_ch, mid_ch, dilation=4), ConvBNReLU(mid_ch, mid_ch, dilation=8)]) self.decode_modules = nn.ModuleList([ConvBNReLU(mid_ch * 2, mid_ch, dilation=4), ConvBNReLU(mid_ch * 2, mid_ch, dilation=2), ConvBNReLU(mid_ch * 2, out_ch)]) def forward(self, x: torch.Tensor) -> torch.Tensor: x_in = self.conv_in(x) x = x_in encode_outputs = [] for m in self.encode_modules: x = m(x) encode_outputs.append(x) x = encode_outputs.pop() for m in self.decode_modules: x2 = encode_outputs.pop() x = m(torch.cat([x, x2], dim=1)) return x + x_in class U2Net(nn.Module): def __init__(self, cfg: dict, out_ch: int = 1): super().__init__() assert "encode" in cfg assert "decode" in cfg self.encode_num = len(cfg["encode"]) encode_list = [] side_list = [] for c in cfg["encode"]: # c: [height, in_ch, mid_ch, out_ch, RSU4F, side] assert len(c) == 6 encode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4])) if c[5] is True: side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1)) self.encode_modules = nn.ModuleList(encode_list) decode_list = [] for c in cfg["decode"]: # c: [height, in_ch, mid_ch, out_ch, RSU4F, side] assert len(c) == 6 decode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4])) if c[5] is True: side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1)) self.decode_modules = nn.ModuleList(decode_list) self.side_modules = nn.ModuleList(side_list) self.out_conv = nn.Conv2d(self.encode_num * out_ch, out_ch, kernel_size=1) def forward(self, x: torch.Tensor) -> Union[torch.Tensor, List[torch.Tensor]]: _, _, h, w = x.shape # collect encode outputs encode_outputs = [] for i, m in enumerate(self.encode_modules): x = m(x) encode_outputs.append(x) if i != self.encode_num - 1: x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True) # collect decode outputs x = encode_outputs.pop() decode_outputs = [x] for m in self.decode_modules: x2 = encode_outputs.pop() x = F.interpolate(x, size=x2.shape[2:], mode='bilinear', align_corners=False) x = m(torch.concat([x, x2], dim=1)) decode_outputs.insert(0, x) # collect side outputs side_outputs = [] for m in self.side_modules: x = decode_outputs.pop() x = F.interpolate(m(x), size=[h, w], mode='bilinear', align_corners=False) side_outputs.insert(0, x) x = self.out_conv(torch.concat(side_outputs, dim=1)) if self.training: # do not use torch.sigmoid for amp safe return [x] + side_outputs else: return torch.sigmoid(x) def u2net_full(out_ch: int = 1): cfg = { # height, in_ch, mid_ch, out_ch, RSU4F, side "encode": [[7, 3, 32, 64, False, False], # En1 [6, 64, 32, 128, False, False], # En2 [5, 128, 64, 256, False, False], # En3 [4, 256, 128, 512, False, False], # En4 [4, 512, 256, 512, True, False], # En5 [4, 512, 256, 512, True, True]], # En6 # height, in_ch, mid_ch, out_ch, RSU4F, side "decode": [[4, 1024, 256, 512, True, True], # De5 [4, 1024, 128, 256, False, True], # De4 [5, 512, 64, 128, False, True], # De3 [6, 256, 32, 64, False, True], # De2 [7, 128, 16, 64, False, True]] # De1 } return U2Net(cfg, out_ch) def u2net_lite(out_ch: int = 1): cfg = { # height, in_ch, mid_ch, out_ch, RSU4F, side "encode": [[7, 3, 16, 64, False, False], # En1 [6, 64, 16, 64, False, False], # En2 [5, 64, 16, 64, False, False], # En3 [4, 64, 16, 64, False, False], # En4 [4, 64, 16, 64, True, False], # En5 [4, 64, 16, 64, True, True]], # En6 # height, in_ch, mid_ch, out_ch, RSU4F, side "decode": [[4, 128, 16, 64, True, True], # De5 [4, 128, 16, 64, False, True], # De4 [5, 128, 16, 64, False, True], # De3 [6, 128, 16, 64, False, True], # De2 [7, 128, 16, 64, False, True]] # De1 } return U2Net(cfg, out_ch) def convert_onnx(m, save_path): m.eval() x = torch.rand(1, 3, 288, 288, requires_grad=True) # export the model torch.onnx.export(m, # model being run x, # model input (or a tuple for multiple inputs) save_path, # where to save the model (can be a file or file-like object) export_params=True, opset_version=11) if __name__ == '__main__': # n_m = RSU(height=7, in_ch=3, mid_ch=12, out_ch=3) # convert_onnx(n_m, "RSU7.onnx") # # n_m = RSU4F(in_ch=3, mid_ch=12, out_ch=3) # convert_onnx(n_m, "RSU4F.onnx") u2net = u2net_full() convert_onnx(u2net, "u2net_full.onnx") ================================================ FILE: pytorch_segmentation/u2net/train.py ================================================ import os import time import datetime from typing import Union, List import torch from torch.utils import data from src import u2net_full from train_utils import train_one_epoch, evaluate, get_params_groups, create_lr_scheduler from my_dataset import DUTSDataset import transforms as T class SODPresetTrain: def __init__(self, base_size: Union[int, List[int]], crop_size: int, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Resize(base_size, resize_mask=True), T.RandomCrop(crop_size), T.RandomHorizontalFlip(hflip_prob), T.Normalize(mean=mean, std=std) ]) def __call__(self, img, target): return self.transforms(img, target) class SODPresetEval: def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Resize(base_size, resize_mask=False), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") batch_size = args.batch_size # 用来保存训练以及验证过程中信息 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) train_dataset = DUTSDataset(args.data_path, train=True, transforms=SODPresetTrain([320, 320], crop_size=288)) val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320])) num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) train_data_loader = data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, collate_fn=train_dataset.collate_fn) val_data_loader = data.DataLoader(val_dataset, batch_size=1, # must be 1 num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = u2net_full() model.to(device) params_group = get_params_groups(model, weight_decay=args.weight_decay) optimizer = torch.optim.AdamW(params_group, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True, warmup_epochs=2) scaler = torch.cuda.amp.GradScaler() if args.amp else None if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) current_mae, current_f1 = 1.0, 0.0 start_time = time.time() for epoch in range(args.start_epoch, args.epochs): mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) save_file = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args} if args.amp: save_file["scaler"] = scaler.state_dict() if epoch % args.eval_interval == 0 or epoch == args.epochs - 1: # 每间隔eval_interval个epoch验证一次,减少验证频率节省训练时间 mae_metric, f1_metric = evaluate(model, val_data_loader, device=device) mae_info, f1_info = mae_metric.compute(), f1_metric.compute() print(f"[epoch: {epoch}] val_MAE: {mae_info:.3f} val_maxF1: {f1_info:.3f}") # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 write_info = f"[epoch: {epoch}] train_loss: {mean_loss:.4f} lr: {lr:.6f} " \ f"MAE: {mae_info:.3f} maxF1: {f1_info:.3f} \n" f.write(write_info) # save_best if current_mae >= mae_info and current_f1 <= f1_info: torch.save(save_file, "save_weights/model_best.pth") # only save latest 10 epoch weights if os.path.exists(f"save_weights/model_{epoch-10}.pth"): os.remove(f"save_weights/model_{epoch-10}.pth") torch.save(save_file, f"save_weights/model_{epoch}.pth") total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("training time {}".format(total_time_str)) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch u2net training") parser.add_argument("--data-path", default="./", help="DUTS root") parser.add_argument("--device", default="cuda", help="training device") parser.add_argument("-b", "--batch-size", default=16, type=int) parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument("--epochs", default=360, type=int, metavar="N", help="number of total epochs to train") parser.add_argument("--eval-interval", default=10, type=int, help="validation interval default 10 Epochs") parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate') parser.add_argument('--print-freq', default=50, type=int, help='print frequency') parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='start epoch') # Mixed precision training parameters parser.add_argument("--amp", action='store_true', help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not os.path.exists("./save_weights"): os.mkdir("./save_weights") main(args) ================================================ FILE: pytorch_segmentation/u2net/train_multi_GPU.py ================================================ import time import os import datetime from typing import Union, List import torch from torch.utils import data from src import u2net_full from train_utils import (train_one_epoch, evaluate, init_distributed_mode, save_on_master, mkdir, create_lr_scheduler, get_params_groups) from my_dataset import DUTSDataset import transforms as T class SODPresetTrain: def __init__(self, base_size: Union[int, List[int]], crop_size: int, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Resize(base_size, resize_mask=True), T.RandomCrop(crop_size), T.RandomHorizontalFlip(hflip_prob), T.Normalize(mean=mean, std=std) ]) def __call__(self, img, target): return self.transforms(img, target) class SODPresetEval: def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Resize(base_size, resize_mask=False), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # 用来保存训练以及验证过程中信息 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) train_dataset = DUTSDataset(args.data_path, train=True, transforms=SODPresetTrain([320, 320], crop_size=288)) val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320])) print("Creating data loaders") if args.distributed: train_sampler = data.distributed.DistributedSampler(train_dataset) test_sampler = data.distributed.DistributedSampler(val_dataset) else: train_sampler = data.RandomSampler(train_dataset) test_sampler = data.SequentialSampler(val_dataset) train_data_loader = data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, pin_memory=True, collate_fn=train_dataset.collate_fn, drop_last=True) val_data_loader = data.DataLoader( val_dataset, batch_size=1, # batch_size must be 1 sampler=test_sampler, num_workers=args.workers, pin_memory=True, collate_fn=train_dataset.collate_fn) # create model num_classes equal background + 20 classes model = u2net_full() model.to(device) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params_group = get_params_groups(model, weight_decay=args.weight_decay) optimizer = torch.optim.AdamW(params_group, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True, warmup_epochs=2) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: mae_metric, f1_metric = evaluate(model, val_data_loader, device=device) print(mae_metric, f1_metric) return print("Start training") current_mae, current_f1 = 1.0, 0.0 start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) save_file = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_file["scaler"] = scaler.state_dict() if epoch % args.eval_interval == 0 or epoch == args.epochs - 1: # 每间隔eval_interval个epoch验证一次,减少验证频率节省训练时间 mae_metric, f1_metric = evaluate(model, val_data_loader, device=device) mae_info, f1_info = mae_metric.compute(), f1_metric.compute() print(f"[epoch: {epoch}] val_MAE: {mae_info:.3f} val_maxF1: {f1_info:.3f}") # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 write_info = f"[epoch: {epoch}] train_loss: {mean_loss:.4f} lr: {lr:.6f} " \ f"MAE: {mae_info:.3f} maxF1: {f1_info:.3f} \n" f.write(write_info) # save_best if current_mae >= mae_info and current_f1 <= f1_info: if args.output_dir: # 只在主节点上执行保存权重操作 save_on_master(save_file, os.path.join(args.output_dir, 'model_best.pth')) if args.output_dir: if args.rank in [-1, 0]: # only save latest 10 epoch weights if os.path.exists(os.path.join(args.output_dir, f'model_{epoch - 10}.pth')): os.remove(os.path.join(args.output_dir, f'model_{epoch - 10}.pth')) # 只在主节点上执行保存权重操作 save_on_master(save_file, os.path.join(args.output_dir, f'model_{epoch}.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(VOCdevkit) parser.add_argument('--data-path', default='./', help='DUTS root') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=16, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start-epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=360, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 是否使用同步BN(在多个GPU之间同步),默认不开启,开启后训练速度会变慢 parser.add_argument('--sync-bn', action='store_true', help='whether using SyncBatchNorm') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 训练学习率 parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate') # 验证频率 parser.add_argument("--eval-interval", default=10, type=int, help="validation interval default 10 Epochs") # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=20, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 分布式进程数 parser.add_argument('--world-size', default=1, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # Mixed precision training parameters parser.add_argument("--amp", action='store_true', help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_segmentation/u2net/train_utils/__init__.py ================================================ from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler, get_params_groups from .distributed_utils import init_distributed_mode, save_on_master, mkdir ================================================ FILE: pytorch_segmentation/u2net/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import time import torch import torch.distributed as dist import torch.nn.functional as F import errno import os class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) def all_gather(data): """ 收集各个进程中的数据 Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() # 进程数 if world_size == 1: return [data] data_list = [None] * world_size dist.all_gather_object(data_list, data) return data_list class MeanAbsoluteError(object): def __init__(self): self.mae_list = [] def update(self, pred: torch.Tensor, gt: torch.Tensor): batch_size, c, h, w = gt.shape assert batch_size == 1, f"validation mode batch_size must be 1, but got batch_size: {batch_size}." resize_pred = F.interpolate(pred, (h, w), mode="bilinear", align_corners=False) error_pixels = torch.sum(torch.abs(resize_pred - gt), dim=(1, 2, 3)) / (h * w) self.mae_list.extend(error_pixels.tolist()) def compute(self): mae = sum(self.mae_list) / len(self.mae_list) return mae def gather_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() gather_mae_list = [] for i in all_gather(self.mae_list): gather_mae_list.extend(i) self.mae_list = gather_mae_list def __str__(self): mae = self.compute() return f'MAE: {mae:.3f}' class F1Score(object): """ refer: https://github.com/xuebinqin/DIS/blob/main/IS-Net/basics.py """ def __init__(self, threshold: float = 0.5): self.precision_cum = None self.recall_cum = None self.num_cum = None self.threshold = threshold def update(self, pred: torch.Tensor, gt: torch.Tensor): batch_size, c, h, w = gt.shape assert batch_size == 1, f"validation mode batch_size must be 1, but got batch_size: {batch_size}." resize_pred = F.interpolate(pred, (h, w), mode="bilinear", align_corners=False) gt_num = torch.sum(torch.gt(gt, self.threshold).float()) pp = resize_pred[torch.gt(gt, self.threshold)] # 对应预测map中GT为前景的区域 nn = resize_pred[torch.le(gt, self.threshold)] # 对应预测map中GT为背景的区域 pp_hist = torch.histc(pp, bins=255, min=0.0, max=1.0) nn_hist = torch.histc(nn, bins=255, min=0.0, max=1.0) # Sort according to the prediction probability from large to small pp_hist_flip = torch.flipud(pp_hist) nn_hist_flip = torch.flipud(nn_hist) pp_hist_flip_cum = torch.cumsum(pp_hist_flip, dim=0) nn_hist_flip_cum = torch.cumsum(nn_hist_flip, dim=0) precision = pp_hist_flip_cum / (pp_hist_flip_cum + nn_hist_flip_cum + 1e-4) recall = pp_hist_flip_cum / (gt_num + 1e-4) if self.precision_cum is None: self.precision_cum = torch.full_like(precision, fill_value=0.) if self.recall_cum is None: self.recall_cum = torch.full_like(recall, fill_value=0.) if self.num_cum is None: self.num_cum = torch.zeros([1], dtype=gt.dtype, device=gt.device) self.precision_cum += precision self.recall_cum += recall self.num_cum += batch_size def compute(self): pre_mean = self.precision_cum / self.num_cum rec_mean = self.recall_cum / self.num_cum f1_mean = (1 + 0.3) * pre_mean * rec_mean / (0.3 * pre_mean + rec_mean + 1e-8) max_f1 = torch.amax(f1_mean).item() return max_f1 def reduce_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() torch.distributed.all_reduce(self.precision_cum) torch.distributed.all_reduce(self.recall_cum) torch.distributed.all_reduce(self.num_cum) def __str__(self): max_f1 = self.compute() return f'maxF1: {max_f1:.3f}' class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0: eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {}'.format(header, total_time_str)) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables printing when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_segmentation/u2net/train_utils/train_and_eval.py ================================================ import math import torch from torch.nn import functional as F import train_utils.distributed_utils as utils def criterion(inputs, target): losses = [F.binary_cross_entropy_with_logits(inputs[i], target) for i in range(len(inputs))] total_loss = sum(losses) return total_loss def evaluate(model, data_loader, device): model.eval() mae_metric = utils.MeanAbsoluteError() f1_metric = utils.F1Score() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for images, targets in metric_logger.log_every(data_loader, 100, header): images, targets = images.to(device), targets.to(device) output = model(images) # post norm # ma = torch.max(output) # mi = torch.min(output) # output = (output - mi) / (ma - mi) mae_metric.update(output, targets) f1_metric.update(output, targets) mae_metric.gather_from_all_processes() f1_metric.reduce_from_all_processes() return mae_metric, f1_metric def train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr def create_lr_scheduler(optimizer, num_step: int, epochs: int, warmup=True, warmup_epochs=1, warmup_factor=1e-3, end_factor=1e-6): assert num_step > 0 and epochs > 0 if warmup is False: warmup_epochs = 0 def f(x): """ 根据step数返回一个学习率倍率因子, 注意在训练开始之前,pytorch会提前调用一次lr_scheduler.step()方法 """ if warmup is True and x <= (warmup_epochs * num_step): alpha = float(x) / (warmup_epochs * num_step) # warmup过程中lr倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha else: current_step = (x - warmup_epochs * num_step) cosine_steps = (epochs - warmup_epochs) * num_step # warmup后lr倍率因子从1 -> end_factor return ((1 + math.cos(current_step * math.pi / cosine_steps)) / 2) * (1 - end_factor) + end_factor return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) def get_params_groups(model: torch.nn.Module, weight_decay: float = 1e-4): params_group = [{"params": [], "weight_decay": 0.}, # no decay {"params": [], "weight_decay": weight_decay}] # with decay for name, param in model.named_parameters(): if not param.requires_grad: continue # frozen weights if len(param.shape) == 1 or name.endswith(".bias"): # bn:(weight,bias) conv2d:(bias) linear:(bias) params_group[0]["params"].append(param) # no decay else: params_group[1]["params"].append(param) # with decay return params_group ================================================ FILE: pytorch_segmentation/u2net/transforms.py ================================================ import random from typing import List, Union from torchvision.transforms import functional as F from torchvision.transforms import transforms as T class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target=None): for t in self.transforms: image, target = t(image, target) return image, target class ToTensor(object): def __call__(self, image, target): image = F.to_tensor(image) target = F.to_tensor(target) return image, target class RandomHorizontalFlip(object): def __init__(self, prob): self.flip_prob = prob def __call__(self, image, target): if random.random() < self.flip_prob: image = F.hflip(image) target = F.hflip(target) return image, target class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target class Resize(object): def __init__(self, size: Union[int, List[int]], resize_mask: bool = True): self.size = size # [h, w] self.resize_mask = resize_mask def __call__(self, image, target=None): image = F.resize(image, self.size) if self.resize_mask is True: target = F.resize(target, self.size) return image, target class RandomCrop(object): def __init__(self, size: int): self.size = size def pad_if_smaller(self, img, fill=0): # 如果图像最小边长小于给定size,则用数值fill进行padding min_size = min(img.shape[-2:]) if min_size < self.size: ow, oh = img.size padh = self.size - oh if oh < self.size else 0 padw = self.size - ow if ow < self.size else 0 img = F.pad(img, [0, 0, padw, padh], fill=fill) return img def __call__(self, image, target): image = self.pad_if_smaller(image) target = self.pad_if_smaller(target) crop_params = T.RandomCrop.get_params(image, (self.size, self.size)) image = F.crop(image, *crop_params) target = F.crop(target, *crop_params) return image, target ================================================ FILE: pytorch_segmentation/u2net/validation.py ================================================ import os from typing import Union, List import torch from torch.utils import data from src import u2net_full from train_utils import evaluate from my_dataset import DUTSDataset import transforms as T class SODPresetEval: def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Resize(base_size, resize_mask=False), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") assert os.path.exists(args.weights), f"weights {args.weights} not found." val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320])) num_workers = 4 val_data_loader = data.DataLoader(val_dataset, batch_size=1, # must be 1 num_workers=num_workers, pin_memory=True, shuffle=False, collate_fn=val_dataset.collate_fn) model = u2net_full() pretrain_weights = torch.load(args.weights, map_location='cpu') if "model" in pretrain_weights: model.load_state_dict(pretrain_weights["model"]) else: model.load_state_dict(pretrain_weights) model.to(device) mae_metric, f1_metric = evaluate(model, val_data_loader, device=device) print(mae_metric, f1_metric) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch u2net validation") parser.add_argument("--data-path", default="./", help="DUTS root") parser.add_argument("--weights", default="./u2net_full.pth") parser.add_argument("--device", default="cuda:0", help="training device") parser.add_argument('--print-freq', default=10, type=int, help='print frequency') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() main(args) ================================================ FILE: pytorch_segmentation/unet/README.md ================================================ # U-Net(Convolutional Networks for Biomedical Image Segmentation) ## 该项目主要参考以下开源仓库 * [https://github.com/milesial/Pytorch-UNet](https://github.com/milesial/Pytorch-UNet) * [https://github.com/pytorch/vision](https://github.com/pytorch/vision) ## 环境配置: * Python3.6/3.7/3.8 * Pytorch1.10 * Ubuntu或Centos(Windows暂不支持多GPU训练) * 最好使用GPU训练 * 详细环境配置见`requirements.txt` ## 文件结构: ``` ├── src: 搭建U-Net模型代码 ├── train_utils: 训练、验证以及多GPU训练相关模块 ├── my_dataset.py: 自定义dataset用于读取DRIVE数据集(视网膜血管分割) ├── train.py: 以单GPU为例进行训练 ├── train_multi_GPU.py: 针对使用多GPU的用户使用 ├── predict.py: 简易的预测脚本,使用训练好的权重进行预测测试 └── compute_mean_std.py: 统计数据集各通道的均值和标准差 ``` ## DRIVE数据集下载地址: * 官网地址: [https://drive.grand-challenge.org/](https://drive.grand-challenge.org/) * 百度云链接: [https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw](https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw) 密码: 8no8 ## 训练方法 * 确保提前准备好数据集 * 若要使用单GPU或者CPU训练,直接使用train.py训练脚本 * 若要使用多GPU训练,使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量 * 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备) * `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py` ## 注意事项 * 在使用训练脚本时,注意要将`--data-path`设置为自己存放`DRIVE`文件夹所在的**根目录** * 在使用预测脚本时,要将`weights_path`设置为你自己生成的权重路径。 * 使用validation文件时,注意确保你的验证集或者测试集中必须包含每个类别的目标,并且使用时只需要修改`--num-classes`、`--data-path`和`--weights`即可,其他代码尽量不要改动 ## 使用U-Net在DRIVE数据集上训练得到的权重(仅供测试使用) - 链接: https://pan.baidu.com/s/1BOqkEpgt1XRqziyc941Hcw 密码: p50a ## 如果对U-Net网络不了解的可参考我的bilibili * [https://www.bilibili.com/video/BV1Vq4y127fB/](https://www.bilibili.com/video/BV1Vq4y127fB/) ## 进一步了解该项目,以及对U-Net代码的分析可参考我的bilibili * [https://b23.tv/PCJJmqN](https://b23.tv/PCJJmqN) ## 本项目U-Net默认使用双线性插值做为上采样,结构图如下 ![u-net](unet.png) ================================================ FILE: pytorch_segmentation/unet/compute_mean_std.py ================================================ import os from PIL import Image import numpy as np def main(): img_channels = 3 img_dir = "./DRIVE/training/images" roi_dir = "./DRIVE/training/mask" assert os.path.exists(img_dir), f"image dir: '{img_dir}' does not exist." assert os.path.exists(roi_dir), f"roi dir: '{roi_dir}' does not exist." img_name_list = [i for i in os.listdir(img_dir) if i.endswith(".tif")] cumulative_mean = np.zeros(img_channels) cumulative_std = np.zeros(img_channels) for img_name in img_name_list: img_path = os.path.join(img_dir, img_name) ori_path = os.path.join(roi_dir, img_name.replace(".tif", "_mask.gif")) img = np.array(Image.open(img_path)) / 255. roi_img = np.array(Image.open(ori_path).convert('L')) img = img[roi_img == 255] cumulative_mean += img.mean(axis=0) cumulative_std += img.std(axis=0) mean = cumulative_mean / len(img_name_list) std = cumulative_std / len(img_name_list) print(f"mean: {mean}") print(f"std: {std}") if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/unet/my_dataset.py ================================================ import os from PIL import Image import numpy as np from torch.utils.data import Dataset class DriveDataset(Dataset): def __init__(self, root: str, train: bool, transforms=None): super(DriveDataset, self).__init__() self.flag = "training" if train else "test" data_root = os.path.join(root, "DRIVE", self.flag) assert os.path.exists(data_root), f"path '{data_root}' does not exists." self.transforms = transforms img_names = [i for i in os.listdir(os.path.join(data_root, "images")) if i.endswith(".tif")] self.img_list = [os.path.join(data_root, "images", i) for i in img_names] self.manual = [os.path.join(data_root, "1st_manual", i.split("_")[0] + "_manual1.gif") for i in img_names] # check files for i in self.manual: if os.path.exists(i) is False: raise FileNotFoundError(f"file {i} does not exists.") self.roi_mask = [os.path.join(data_root, "mask", i.split("_")[0] + f"_{self.flag}_mask.gif") for i in img_names] # check files for i in self.roi_mask: if os.path.exists(i) is False: raise FileNotFoundError(f"file {i} does not exists.") def __getitem__(self, idx): img = Image.open(self.img_list[idx]).convert('RGB') manual = Image.open(self.manual[idx]).convert('L') manual = np.array(manual) / 255 roi_mask = Image.open(self.roi_mask[idx]).convert('L') roi_mask = 255 - np.array(roi_mask) mask = np.clip(manual + roi_mask, a_min=0, a_max=255) # 这里转回PIL的原因是,transforms中是对PIL数据进行处理 mask = Image.fromarray(mask) if self.transforms is not None: img, mask = self.transforms(img, mask) return img, mask def __len__(self): return len(self.img_list) @staticmethod def collate_fn(batch): images, targets = list(zip(*batch)) batched_imgs = cat_list(images, fill_value=0) batched_targets = cat_list(targets, fill_value=255) return batched_imgs, batched_targets def cat_list(images, fill_value=0): max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) batch_shape = (len(images),) + max_size batched_imgs = images[0].new(*batch_shape).fill_(fill_value) for img, pad_img in zip(images, batched_imgs): pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) return batched_imgs ================================================ FILE: pytorch_segmentation/unet/predict.py ================================================ import os import time import torch from torchvision import transforms import numpy as np from PIL import Image from src import UNet def time_synchronized(): torch.cuda.synchronize() if torch.cuda.is_available() else None return time.time() def main(): classes = 1 # exclude background weights_path = "./save_weights/best_model.pth" img_path = "./DRIVE/test/images/01_test.tif" roi_mask_path = "./DRIVE/test/mask/01_test_mask.gif" assert os.path.exists(weights_path), f"weights {weights_path} not found." assert os.path.exists(img_path), f"image {img_path} not found." assert os.path.exists(roi_mask_path), f"image {roi_mask_path} not found." mean = (0.709, 0.381, 0.224) std = (0.127, 0.079, 0.043) # get devices device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # create model model = UNet(in_channels=3, num_classes=classes+1, base_c=32) # load weights model.load_state_dict(torch.load(weights_path, map_location='cpu')['model']) model.to(device) # load roi mask roi_img = Image.open(roi_mask_path).convert('L') roi_img = np.array(roi_img) # load image original_img = Image.open(img_path).convert('RGB') # from pil image to tensor and normalize data_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) img = data_transform(original_img) # expand batch dimension img = torch.unsqueeze(img, dim=0) model.eval() # 进入验证模式 with torch.no_grad(): # init model img_height, img_width = img.shape[-2:] init_img = torch.zeros((1, 3, img_height, img_width), device=device) model(init_img) t_start = time_synchronized() output = model(img.to(device)) t_end = time_synchronized() print("inference time: {}".format(t_end - t_start)) prediction = output['out'].argmax(1).squeeze(0) prediction = prediction.to("cpu").numpy().astype(np.uint8) # 将前景对应的像素值改成255(白色) prediction[prediction == 1] = 255 # 将不敢兴趣的区域像素设置成0(黑色) prediction[roi_img == 0] = 0 mask = Image.fromarray(prediction) mask.save("test_result.png") if __name__ == '__main__': main() ================================================ FILE: pytorch_segmentation/unet/requirements.txt ================================================ numpy==1.22.0 torch==1.13.1 torchvision==0.11.1 Pillow ================================================ FILE: pytorch_segmentation/unet/results20220109-165837.txt ================================================ [epoch: 0] train_loss: 1.3541 lr: 0.010000 dice coefficient: 0.111 global correct: 71.1 average row correct: ['78.2', '22.4'] IoU: ['70.2', '9.0'] mean IoU: 39.6 [epoch: 1] train_loss: 1.0442 lr: 0.009955 dice coefficient: 0.014 global correct: 85.7 average row correct: ['98.0', '1.0'] IoU: ['85.7', '0.9'] mean IoU: 43.3 [epoch: 2] train_loss: 0.9315 lr: 0.009910 dice coefficient: 0.000 global correct: 87.2 average row correct: ['99.9', '0.0'] IoU: ['87.2', '0.0'] mean IoU: 43.6 [epoch: 3] train_loss: 0.7929 lr: 0.009864 dice coefficient: 0.021 global correct: 87.4 average row correct: ['100.0', '1.1'] IoU: ['87.4', '1.1'] mean IoU: 44.3 [epoch: 4] train_loss: 0.7329 lr: 0.009819 dice coefficient: 0.210 global correct: 88.8 average row correct: ['99.9', '12.5'] IoU: ['88.6', '12.4'] mean IoU: 50.5 [epoch: 5] train_loss: 0.6343 lr: 0.009774 dice coefficient: 0.440 global correct: 90.5 average row correct: ['99.3', '30.8'] IoU: ['90.2', '29.3'] mean IoU: 59.7 [epoch: 6] train_loss: 0.6105 lr: 0.009728 dice coefficient: 0.575 global correct: 91.8 average row correct: ['98.6', '45.5'] IoU: ['91.3', '41.5'] mean IoU: 66.4 [epoch: 7] train_loss: 0.5982 lr: 0.009683 dice coefficient: 0.651 global correct: 91.3 average row correct: ['95.1', '64.8'] IoU: ['90.5', '48.5'] mean IoU: 69.5 [epoch: 8] train_loss: 0.6641 lr: 0.009637 dice coefficient: 0.651 global correct: 90.7 average row correct: ['94.0', '67.7'] IoU: ['89.8', '48.0'] mean IoU: 68.9 [epoch: 9] train_loss: 0.5530 lr: 0.009592 dice coefficient: 0.550 global correct: 80.7 average row correct: ['81.0', '78.3'] IoU: ['78.5', '34.0'] mean IoU: 56.3 [epoch: 10] train_loss: 0.5676 lr: 0.009547 dice coefficient: 0.659 global correct: 89.2 average row correct: ['91.0', '77.0'] IoU: ['88.1', '47.6'] mean IoU: 67.8 [epoch: 11] train_loss: 0.5494 lr: 0.009501 dice coefficient: 0.654 global correct: 90.0 average row correct: ['92.8', '70.5'] IoU: ['89.0', '47.3'] mean IoU: 68.1 [epoch: 12] train_loss: 0.5293 lr: 0.009456 dice coefficient: 0.713 global correct: 91.9 average row correct: ['94.6', '73.2'] IoU: ['91.1', '53.5'] mean IoU: 72.3 [epoch: 13] train_loss: 0.5291 lr: 0.009410 dice coefficient: 0.689 global correct: 91.2 average row correct: ['93.6', '75.1'] IoU: ['90.3', '52.2'] mean IoU: 71.3 [epoch: 14] train_loss: 0.5163 lr: 0.009365 dice coefficient: 0.691 global correct: 92.6 average row correct: ['96.3', '67.0'] IoU: ['91.9', '53.4'] mean IoU: 72.6 [epoch: 15] train_loss: 0.5168 lr: 0.009319 dice coefficient: 0.722 global correct: 93.3 average row correct: ['96.6', '70.3'] IoU: ['92.6', '57.1'] mean IoU: 74.9 [epoch: 16] train_loss: 0.5153 lr: 0.009273 dice coefficient: 0.740 global correct: 94.0 average row correct: ['97.7', '68.3'] IoU: ['93.4', '59.1'] mean IoU: 76.3 [epoch: 17] train_loss: 0.4923 lr: 0.009228 dice coefficient: 0.734 global correct: 93.6 average row correct: ['96.9', '70.7'] IoU: ['92.9', '58.4'] mean IoU: 75.7 [epoch: 18] train_loss: 0.4692 lr: 0.009182 dice coefficient: 0.740 global correct: 93.7 average row correct: ['97.0', '71.2'] IoU: ['93.1', '59.2'] mean IoU: 76.1 [epoch: 19] train_loss: 0.4701 lr: 0.009136 dice coefficient: 0.754 global correct: 94.0 average row correct: ['97.1', '72.9'] IoU: ['93.4', '60.8'] mean IoU: 77.1 [epoch: 20] train_loss: 0.4710 lr: 0.009091 dice coefficient: 0.761 global correct: 94.0 average row correct: ['96.8', '75.0'] IoU: ['93.4', '61.5'] mean IoU: 77.5 [epoch: 21] train_loss: 0.4624 lr: 0.009045 dice coefficient: 0.756 global correct: 94.1 average row correct: ['97.3', '72.3'] IoU: ['93.6', '61.1'] mean IoU: 77.3 [epoch: 22] train_loss: 0.4480 lr: 0.008999 dice coefficient: 0.759 global correct: 94.3 average row correct: ['97.5', '71.9'] IoU: ['93.7', '61.5'] mean IoU: 77.6 [epoch: 23] train_loss: 0.4342 lr: 0.008954 dice coefficient: 0.748 global correct: 94.3 average row correct: ['98.1', '68.2'] IoU: ['93.7', '60.2'] mean IoU: 77.0 [epoch: 24] train_loss: 0.4465 lr: 0.008908 dice coefficient: 0.771 global correct: 94.5 average row correct: ['97.6', '73.4'] IoU: ['93.9', '63.0'] mean IoU: 78.5 [epoch: 25] train_loss: 0.4295 lr: 0.008862 dice coefficient: 0.770 global correct: 94.5 average row correct: ['97.6', '73.1'] IoU: ['94.0', '62.9'] mean IoU: 78.4 [epoch: 26] train_loss: 0.4246 lr: 0.008816 dice coefficient: 0.768 global correct: 94.6 average row correct: ['98.0', '71.2'] IoU: ['94.1', '62.6'] mean IoU: 78.3 [epoch: 27] train_loss: 0.4180 lr: 0.008770 dice coefficient: 0.771 global correct: 94.6 average row correct: ['97.9', '72.1'] IoU: ['94.1', '63.0'] mean IoU: 78.5 [epoch: 28] train_loss: 0.4408 lr: 0.008724 dice coefficient: 0.775 global correct: 94.5 average row correct: ['97.3', '75.3'] IoU: ['93.9', '63.5'] mean IoU: 78.7 [epoch: 29] train_loss: 0.4323 lr: 0.008678 dice coefficient: 0.763 global correct: 94.5 average row correct: ['97.9', '70.9'] IoU: ['93.9', '62.0'] mean IoU: 78.0 [epoch: 30] train_loss: 0.4144 lr: 0.008632 dice coefficient: 0.772 global correct: 94.3 average row correct: ['96.8', '76.9'] IoU: ['93.7', '63.1'] mean IoU: 78.4 [epoch: 31] train_loss: 0.4130 lr: 0.008586 dice coefficient: 0.776 global correct: 94.6 average row correct: ['97.6', '74.0'] IoU: ['94.0', '63.6'] mean IoU: 78.8 [epoch: 32] train_loss: 0.4109 lr: 0.008540 dice coefficient: 0.776 global correct: 94.6 average row correct: ['97.5', '74.8'] IoU: ['94.0', '63.6'] mean IoU: 78.8 [epoch: 33] train_loss: 0.4190 lr: 0.008494 dice coefficient: 0.779 global correct: 94.7 average row correct: ['97.6', '74.3'] IoU: ['94.1', '64.0'] mean IoU: 79.1 [epoch: 34] train_loss: 0.4163 lr: 0.008448 dice coefficient: 0.773 global correct: 94.6 average row correct: ['97.8', '72.6'] IoU: ['94.1', '63.2'] mean IoU: 78.6 [epoch: 35] train_loss: 0.4064 lr: 0.008402 dice coefficient: 0.775 global correct: 94.7 average row correct: ['98.0', '72.1'] IoU: ['94.2', '63.5'] mean IoU: 78.8 [epoch: 36] train_loss: 0.3986 lr: 0.008356 dice coefficient: 0.785 global correct: 94.7 average row correct: ['97.2', '77.1'] IoU: ['94.1', '64.8'] mean IoU: 79.4 [epoch: 37] train_loss: 0.3959 lr: 0.008310 dice coefficient: 0.784 global correct: 94.8 average row correct: ['97.7', '74.9'] IoU: ['94.3', '64.7'] mean IoU: 79.5 [epoch: 38] train_loss: 0.4058 lr: 0.008264 dice coefficient: 0.786 global correct: 94.7 average row correct: ['97.4', '76.4'] IoU: ['94.2', '64.9'] mean IoU: 79.5 [epoch: 39] train_loss: 0.3934 lr: 0.008218 dice coefficient: 0.786 global correct: 94.8 average row correct: ['97.5', '76.2'] IoU: ['94.2', '64.9'] mean IoU: 79.5 [epoch: 40] train_loss: 0.3926 lr: 0.008171 dice coefficient: 0.783 global correct: 94.7 average row correct: ['97.3', '76.4'] IoU: ['94.1', '64.6'] mean IoU: 79.3 [epoch: 41] train_loss: 0.3880 lr: 0.008125 dice coefficient: 0.787 global correct: 94.8 average row correct: ['97.6', '75.6'] IoU: ['94.3', '65.1'] mean IoU: 79.7 [epoch: 42] train_loss: 0.3964 lr: 0.008079 dice coefficient: 0.788 global correct: 94.8 average row correct: ['97.4', '77.1'] IoU: ['94.2', '65.3'] mean IoU: 79.7 [epoch: 43] train_loss: 0.3980 lr: 0.008032 dice coefficient: 0.787 global correct: 94.7 average row correct: ['97.3', '77.0'] IoU: ['94.2', '65.1'] mean IoU: 79.6 [epoch: 44] train_loss: 0.3846 lr: 0.007986 dice coefficient: 0.787 global correct: 94.7 average row correct: ['97.2', '77.4'] IoU: ['94.1', '65.1'] mean IoU: 79.6 [epoch: 45] train_loss: 0.3832 lr: 0.007940 dice coefficient: 0.783 global correct: 94.7 average row correct: ['97.5', '75.6'] IoU: ['94.2', '64.6'] mean IoU: 79.4 [epoch: 46] train_loss: 0.3839 lr: 0.007893 dice coefficient: 0.789 global correct: 94.9 average row correct: ['97.6', '76.1'] IoU: ['94.3', '65.4'] mean IoU: 79.8 [epoch: 47] train_loss: 0.3739 lr: 0.007847 dice coefficient: 0.789 global correct: 94.8 average row correct: ['97.4', '76.8'] IoU: ['94.3', '65.4'] mean IoU: 79.8 [epoch: 48] train_loss: 0.4064 lr: 0.007800 dice coefficient: 0.783 global correct: 94.7 average row correct: ['97.4', '76.0'] IoU: ['94.1', '64.4'] mean IoU: 79.3 [epoch: 49] train_loss: 0.3878 lr: 0.007754 dice coefficient: 0.787 global correct: 94.7 average row correct: ['97.3', '77.1'] IoU: ['94.2', '65.1'] mean IoU: 79.6 [epoch: 50] train_loss: 0.3856 lr: 0.007707 dice coefficient: 0.788 global correct: 94.8 average row correct: ['97.6', '76.2'] IoU: ['94.3', '65.3'] mean IoU: 79.8 [epoch: 51] train_loss: 0.3883 lr: 0.007661 dice coefficient: 0.788 global correct: 94.5 average row correct: ['96.5', '80.7'] IoU: ['93.9', '65.2'] mean IoU: 79.5 [epoch: 52] train_loss: 0.3965 lr: 0.007614 dice coefficient: 0.791 global correct: 94.8 average row correct: ['97.2', '78.1'] IoU: ['94.2', '65.5'] mean IoU: 79.9 [epoch: 53] train_loss: 0.3851 lr: 0.007567 dice coefficient: 0.793 global correct: 94.9 average row correct: ['97.4', '77.5'] IoU: ['94.3', '65.9'] mean IoU: 80.1 [epoch: 54] train_loss: 0.3859 lr: 0.007521 dice coefficient: 0.790 global correct: 94.9 average row correct: ['97.6', '76.2'] IoU: ['94.3', '65.5'] mean IoU: 79.9 [epoch: 55] train_loss: 0.3801 lr: 0.007474 dice coefficient: 0.790 global correct: 94.8 average row correct: ['97.4', '77.2'] IoU: ['94.3', '65.5'] mean IoU: 79.9 [epoch: 56] train_loss: 0.3928 lr: 0.007427 dice coefficient: 0.786 global correct: 94.9 average row correct: ['98.0', '73.7'] IoU: ['94.4', '64.9'] mean IoU: 79.7 [epoch: 57] train_loss: 0.3930 lr: 0.007381 dice coefficient: 0.790 global correct: 94.6 average row correct: ['96.8', '79.6'] IoU: ['94.0', '65.4'] mean IoU: 79.7 [epoch: 58] train_loss: 0.3738 lr: 0.007334 dice coefficient: 0.789 global correct: 94.9 average row correct: ['97.8', '75.0'] IoU: ['94.4', '65.4'] mean IoU: 79.9 [epoch: 59] train_loss: 0.3706 lr: 0.007287 dice coefficient: 0.795 global correct: 94.8 average row correct: ['97.1', '79.5'] IoU: ['94.2', '66.1'] mean IoU: 80.2 [epoch: 60] train_loss: 0.3783 lr: 0.007240 dice coefficient: 0.795 global correct: 95.0 average row correct: ['97.7', '76.7'] IoU: ['94.5', '66.2'] mean IoU: 80.3 [epoch: 61] train_loss: 0.3656 lr: 0.007193 dice coefficient: 0.792 global correct: 95.1 average row correct: ['98.0', '74.6'] IoU: ['94.5', '65.7'] mean IoU: 80.1 [epoch: 62] train_loss: 0.3773 lr: 0.007146 dice coefficient: 0.796 global correct: 95.0 average row correct: ['97.7', '76.6'] IoU: ['94.5', '66.3'] mean IoU: 80.4 [epoch: 63] train_loss: 0.3703 lr: 0.007099 dice coefficient: 0.796 global correct: 95.1 average row correct: ['97.9', '75.8'] IoU: ['94.6', '66.2'] mean IoU: 80.4 [epoch: 64] train_loss: 0.3630 lr: 0.007052 dice coefficient: 0.794 global correct: 94.8 average row correct: ['97.1', '79.4'] IoU: ['94.2', '66.0'] mean IoU: 80.1 [epoch: 65] train_loss: 0.3680 lr: 0.007005 dice coefficient: 0.797 global correct: 95.1 average row correct: ['97.7', '76.8'] IoU: ['94.5', '66.4'] mean IoU: 80.5 [epoch: 66] train_loss: 0.3557 lr: 0.006958 dice coefficient: 0.799 global correct: 95.1 average row correct: ['97.5', '77.9'] IoU: ['94.5', '66.7'] mean IoU: 80.6 [epoch: 67] train_loss: 0.3759 lr: 0.006911 dice coefficient: 0.796 global correct: 94.8 average row correct: ['97.0', '80.0'] IoU: ['94.2', '66.3'] mean IoU: 80.2 [epoch: 68] train_loss: 0.3638 lr: 0.006864 dice coefficient: 0.790 global correct: 95.0 average row correct: ['98.0', '74.3'] IoU: ['94.5', '65.4'] mean IoU: 79.9 [epoch: 69] train_loss: 0.3540 lr: 0.006817 dice coefficient: 0.796 global correct: 94.6 average row correct: ['96.3', '83.0'] IoU: ['94.0', '66.2'] mean IoU: 80.1 [epoch: 70] train_loss: 0.3602 lr: 0.006770 dice coefficient: 0.798 global correct: 94.9 average row correct: ['97.1', '79.7'] IoU: ['94.3', '66.6'] mean IoU: 80.4 [epoch: 71] train_loss: 0.3597 lr: 0.006722 dice coefficient: 0.797 global correct: 95.0 average row correct: ['97.6', '77.2'] IoU: ['94.5', '66.5'] mean IoU: 80.5 [epoch: 72] train_loss: 0.3618 lr: 0.006675 dice coefficient: 0.802 global correct: 95.1 average row correct: ['97.5', '78.8'] IoU: ['94.5', '67.1'] mean IoU: 80.8 [epoch: 73] train_loss: 0.3582 lr: 0.006628 dice coefficient: 0.803 global correct: 95.1 average row correct: ['97.4', '79.3'] IoU: ['94.5', '67.2'] mean IoU: 80.9 [epoch: 74] train_loss: 0.3624 lr: 0.006580 dice coefficient: 0.800 global correct: 95.1 average row correct: ['97.7', '77.3'] IoU: ['94.6', '66.8'] mean IoU: 80.7 [epoch: 75] train_loss: 0.3648 lr: 0.006533 dice coefficient: 0.795 global correct: 95.1 average row correct: ['98.2', '74.5'] IoU: ['94.6', '66.1'] mean IoU: 80.4 [epoch: 76] train_loss: 0.3553 lr: 0.006486 dice coefficient: 0.801 global correct: 95.0 average row correct: ['97.3', '79.6'] IoU: ['94.4', '67.0'] mean IoU: 80.7 [epoch: 77] train_loss: 0.3632 lr: 0.006438 dice coefficient: 0.796 global correct: 94.6 average row correct: ['96.5', '82.2'] IoU: ['94.0', '66.1'] mean IoU: 80.1 [epoch: 78] train_loss: 0.3511 lr: 0.006391 dice coefficient: 0.801 global correct: 95.2 average row correct: ['97.9', '76.5'] IoU: ['94.7', '67.0'] mean IoU: 80.8 [epoch: 79] train_loss: 0.3602 lr: 0.006343 dice coefficient: 0.803 global correct: 95.2 average row correct: ['97.8', '77.4'] IoU: ['94.7', '67.2'] mean IoU: 80.9 [epoch: 80] train_loss: 0.3585 lr: 0.006295 dice coefficient: 0.801 global correct: 94.9 average row correct: ['97.0', '80.8'] IoU: ['94.3', '67.0'] mean IoU: 80.7 [epoch: 81] train_loss: 0.3543 lr: 0.006248 dice coefficient: 0.802 global correct: 95.1 average row correct: ['97.5', '78.6'] IoU: ['94.6', '67.1'] mean IoU: 80.8 [epoch: 82] train_loss: 0.3689 lr: 0.006200 dice coefficient: 0.804 global correct: 95.1 average row correct: ['97.3', '79.8'] IoU: ['94.5', '67.4'] mean IoU: 80.9 [epoch: 83] train_loss: 0.3588 lr: 0.006152 dice coefficient: 0.803 global correct: 94.9 average row correct: ['96.9', '81.6'] IoU: ['94.3', '67.2'] mean IoU: 80.8 [epoch: 84] train_loss: 0.3640 lr: 0.006105 dice coefficient: 0.798 global correct: 94.9 average row correct: ['97.3', '78.9'] IoU: ['94.4', '66.4'] mean IoU: 80.4 [epoch: 85] train_loss: 0.3635 lr: 0.006057 dice coefficient: 0.802 global correct: 95.2 average row correct: ['97.7', '77.7'] IoU: ['94.6', '67.1'] mean IoU: 80.9 [epoch: 86] train_loss: 0.3441 lr: 0.006009 dice coefficient: 0.802 global correct: 95.2 average row correct: ['98.0', '76.1'] IoU: ['94.7', '67.0'] mean IoU: 80.9 [epoch: 87] train_loss: 0.3553 lr: 0.005961 dice coefficient: 0.806 global correct: 95.1 average row correct: ['97.4', '79.8'] IoU: ['94.6', '67.6'] mean IoU: 81.1 [epoch: 88] train_loss: 0.3558 lr: 0.005913 dice coefficient: 0.804 global correct: 95.0 average row correct: ['97.2', '80.4'] IoU: ['94.5', '67.4'] mean IoU: 80.9 [epoch: 89] train_loss: 0.3638 lr: 0.005865 dice coefficient: 0.804 global correct: 95.1 average row correct: ['97.6', '78.5'] IoU: ['94.6', '67.3'] mean IoU: 81.0 [epoch: 90] train_loss: 0.3546 lr: 0.005817 dice coefficient: 0.804 global correct: 95.1 average row correct: ['97.6', '78.5'] IoU: ['94.6', '67.3'] mean IoU: 80.9 [epoch: 91] train_loss: 0.3587 lr: 0.005769 dice coefficient: 0.804 global correct: 95.0 average row correct: ['97.0', '81.0'] IoU: ['94.4', '67.3'] mean IoU: 80.9 [epoch: 92] train_loss: 0.3546 lr: 0.005721 dice coefficient: 0.804 global correct: 95.0 average row correct: ['97.0', '81.2'] IoU: ['94.4', '67.3'] mean IoU: 80.9 [epoch: 93] train_loss: 0.3505 lr: 0.005673 dice coefficient: 0.804 global correct: 95.2 average row correct: ['97.6', '78.4'] IoU: ['94.6', '67.3'] mean IoU: 81.0 [epoch: 94] train_loss: 0.3545 lr: 0.005625 dice coefficient: 0.803 global correct: 95.0 average row correct: ['97.2', '80.2'] IoU: ['94.5', '67.2'] mean IoU: 80.9 [epoch: 95] train_loss: 0.3497 lr: 0.005577 dice coefficient: 0.806 global correct: 95.2 average row correct: ['97.8', '78.0'] IoU: ['94.7', '67.6'] mean IoU: 81.1 [epoch: 96] train_loss: 0.3476 lr: 0.005528 dice coefficient: 0.806 global correct: 95.1 average row correct: ['97.4', '79.9'] IoU: ['94.6', '67.7'] mean IoU: 81.1 [epoch: 97] train_loss: 0.3479 lr: 0.005480 dice coefficient: 0.805 global correct: 95.0 average row correct: ['96.9', '81.8'] IoU: ['94.4', '67.5'] mean IoU: 81.0 [epoch: 98] train_loss: 0.3563 lr: 0.005432 dice coefficient: 0.807 global correct: 95.1 average row correct: ['97.2', '80.6'] IoU: ['94.6', '67.8'] mean IoU: 81.2 [epoch: 99] train_loss: 0.3444 lr: 0.005383 dice coefficient: 0.805 global correct: 95.2 average row correct: ['97.7', '78.2'] IoU: ['94.7', '67.5'] mean IoU: 81.1 [epoch: 100] train_loss: 0.3419 lr: 0.005335 dice coefficient: 0.805 global correct: 95.1 average row correct: ['97.2', '80.4'] IoU: ['94.5', '67.5'] mean IoU: 81.0 [epoch: 101] train_loss: 0.3504 lr: 0.005286 dice coefficient: 0.807 global correct: 95.0 average row correct: ['96.9', '82.1'] IoU: ['94.4', '67.7'] mean IoU: 81.1 [epoch: 102] train_loss: 0.3511 lr: 0.005238 dice coefficient: 0.802 global correct: 95.0 average row correct: ['97.2', '79.7'] IoU: ['94.4', '67.0'] mean IoU: 80.7 [epoch: 103] train_loss: 0.3431 lr: 0.005189 dice coefficient: 0.802 global correct: 95.2 average row correct: ['98.0', '76.5'] IoU: ['94.7', '67.1'] mean IoU: 80.9 [epoch: 104] train_loss: 0.3453 lr: 0.005140 dice coefficient: 0.805 global correct: 95.2 average row correct: ['97.7', '78.0'] IoU: ['94.7', '67.5'] mean IoU: 81.1 [epoch: 105] train_loss: 0.3475 lr: 0.005092 dice coefficient: 0.805 global correct: 95.0 average row correct: ['97.0', '81.2'] IoU: ['94.4', '67.5'] mean IoU: 81.0 [epoch: 106] train_loss: 0.3434 lr: 0.005043 dice coefficient: 0.806 global correct: 95.1 average row correct: ['97.4', '79.7'] IoU: ['94.6', '67.6'] mean IoU: 81.1 [epoch: 107] train_loss: 0.3426 lr: 0.004994 dice coefficient: 0.807 global correct: 95.2 average row correct: ['97.6', '78.8'] IoU: ['94.7', '67.8'] mean IoU: 81.2 [epoch: 108] train_loss: 0.3372 lr: 0.004945 dice coefficient: 0.807 global correct: 95.0 average row correct: ['96.9', '82.1'] IoU: ['94.4', '67.8'] mean IoU: 81.1 [epoch: 109] train_loss: 0.3474 lr: 0.004896 dice coefficient: 0.805 global correct: 95.2 average row correct: ['97.5', '79.1'] IoU: ['94.6', '67.5'] mean IoU: 81.1 [epoch: 110] train_loss: 0.3393 lr: 0.004847 dice coefficient: 0.804 global correct: 95.1 average row correct: ['97.4', '79.4'] IoU: ['94.5', '67.3'] mean IoU: 80.9 [epoch: 111] train_loss: 0.3381 lr: 0.004798 dice coefficient: 0.808 global correct: 95.2 average row correct: ['97.4', '79.9'] IoU: ['94.6', '67.8'] mean IoU: 81.2 [epoch: 112] train_loss: 0.3464 lr: 0.004749 dice coefficient: 0.808 global correct: 95.2 average row correct: ['97.3', '80.4'] IoU: ['94.6', '68.0'] mean IoU: 81.3 [epoch: 113] train_loss: 0.3397 lr: 0.004700 dice coefficient: 0.806 global correct: 95.3 average row correct: ['97.8', '77.7'] IoU: ['94.8', '67.7'] mean IoU: 81.2 [epoch: 114] train_loss: 0.3409 lr: 0.004651 dice coefficient: 0.808 global correct: 95.1 average row correct: ['97.0', '81.7'] IoU: ['94.5', '67.9'] mean IoU: 81.2 [epoch: 115] train_loss: 0.3396 lr: 0.004601 dice coefficient: 0.809 global correct: 95.2 average row correct: ['97.5', '79.8'] IoU: ['94.7', '68.1'] mean IoU: 81.4 [epoch: 116] train_loss: 0.3402 lr: 0.004552 dice coefficient: 0.810 global correct: 95.3 average row correct: ['97.5', '79.8'] IoU: ['94.7', '68.2'] mean IoU: 81.5 [epoch: 117] train_loss: 0.3444 lr: 0.004503 dice coefficient: 0.810 global correct: 95.1 average row correct: ['97.0', '82.2'] IoU: ['94.5', '68.2'] mean IoU: 81.3 [epoch: 118] train_loss: 0.3391 lr: 0.004453 dice coefficient: 0.809 global correct: 95.1 average row correct: ['97.1', '81.7'] IoU: ['94.5', '68.0'] mean IoU: 81.3 [epoch: 119] train_loss: 0.3360 lr: 0.004404 dice coefficient: 0.810 global correct: 95.2 average row correct: ['97.4', '80.5'] IoU: ['94.7', '68.2'] mean IoU: 81.4 [epoch: 120] train_loss: 0.3418 lr: 0.004354 dice coefficient: 0.810 global correct: 95.2 average row correct: ['97.3', '80.6'] IoU: ['94.7', '68.2'] mean IoU: 81.4 [epoch: 121] train_loss: 0.3308 lr: 0.004304 dice coefficient: 0.809 global correct: 95.1 average row correct: ['97.2', '81.1'] IoU: ['94.6', '68.0'] mean IoU: 81.3 [epoch: 122] train_loss: 0.3440 lr: 0.004255 dice coefficient: 0.808 global correct: 95.2 average row correct: ['97.4', '80.2'] IoU: ['94.6', '67.9'] mean IoU: 81.3 [epoch: 123] train_loss: 0.3344 lr: 0.004205 dice coefficient: 0.810 global correct: 95.2 average row correct: ['97.3', '80.8'] IoU: ['94.6', '68.2'] mean IoU: 81.4 [epoch: 124] train_loss: 0.3282 lr: 0.004155 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.4', '80.7'] IoU: ['94.7', '68.4'] mean IoU: 81.5 [epoch: 125] train_loss: 0.3342 lr: 0.004105 dice coefficient: 0.809 global correct: 95.2 average row correct: ['97.5', '79.7'] IoU: ['94.7', '68.1'] mean IoU: 81.4 [epoch: 126] train_loss: 0.3411 lr: 0.004055 dice coefficient: 0.809 global correct: 95.2 average row correct: ['97.5', '79.7'] IoU: ['94.7', '68.0'] mean IoU: 81.4 [epoch: 127] train_loss: 0.3415 lr: 0.004005 dice coefficient: 0.809 global correct: 95.3 average row correct: ['97.6', '79.2'] IoU: ['94.7', '68.0'] mean IoU: 81.4 [epoch: 128] train_loss: 0.3360 lr: 0.003955 dice coefficient: 0.808 global correct: 95.3 average row correct: ['97.8', '78.3'] IoU: ['94.8', '67.8'] mean IoU: 81.3 [epoch: 129] train_loss: 0.3323 lr: 0.003905 dice coefficient: 0.808 global correct: 95.0 average row correct: ['96.9', '82.2'] IoU: ['94.5', '67.9'] mean IoU: 81.2 [epoch: 130] train_loss: 0.3427 lr: 0.003855 dice coefficient: 0.807 global correct: 94.9 average row correct: ['96.7', '83.2'] IoU: ['94.3', '67.7'] mean IoU: 81.0 [epoch: 131] train_loss: 0.3402 lr: 0.003804 dice coefficient: 0.808 global correct: 95.1 average row correct: ['97.0', '82.0'] IoU: ['94.5', '67.9'] mean IoU: 81.2 [epoch: 132] train_loss: 0.3388 lr: 0.003754 dice coefficient: 0.809 global correct: 95.2 average row correct: ['97.3', '80.7'] IoU: ['94.6', '68.0'] mean IoU: 81.3 [epoch: 133] train_loss: 0.3366 lr: 0.003704 dice coefficient: 0.802 global correct: 94.8 average row correct: ['96.4', '83.4'] IoU: ['94.2', '67.1'] mean IoU: 80.6 [epoch: 134] train_loss: 0.3347 lr: 0.003653 dice coefficient: 0.809 global correct: 95.2 average row correct: ['97.3', '80.7'] IoU: ['94.6', '68.0'] mean IoU: 81.3 [epoch: 135] train_loss: 0.3405 lr: 0.003602 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.2', '81.2'] IoU: ['94.6', '68.3'] mean IoU: 81.5 [epoch: 136] train_loss: 0.3342 lr: 0.003552 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.3', '81.1'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 137] train_loss: 0.3309 lr: 0.003501 dice coefficient: 0.812 global correct: 95.3 average row correct: ['97.5', '80.2'] IoU: ['94.8', '68.4'] mean IoU: 81.6 [epoch: 138] train_loss: 0.3281 lr: 0.003450 dice coefficient: 0.810 global correct: 95.3 average row correct: ['97.5', '79.8'] IoU: ['94.7', '68.2'] mean IoU: 81.5 [epoch: 139] train_loss: 0.3283 lr: 0.003399 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.2', '81.6'] IoU: ['94.6', '68.3'] mean IoU: 81.5 [epoch: 140] train_loss: 0.3344 lr: 0.003348 dice coefficient: 0.811 global correct: 95.3 average row correct: ['97.5', '80.1'] IoU: ['94.7', '68.4'] mean IoU: 81.6 [epoch: 141] train_loss: 0.3331 lr: 0.003297 dice coefficient: 0.810 global correct: 95.3 average row correct: ['97.7', '78.8'] IoU: ['94.8', '68.2'] mean IoU: 81.5 [epoch: 142] train_loss: 0.3339 lr: 0.003246 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.3', '81.0'] IoU: ['94.7', '68.2'] mean IoU: 81.4 [epoch: 143] train_loss: 0.3274 lr: 0.003194 dice coefficient: 0.810 global correct: 95.1 average row correct: ['97.1', '81.7'] IoU: ['94.6', '68.1'] mean IoU: 81.3 [epoch: 144] train_loss: 0.3410 lr: 0.003143 dice coefficient: 0.807 global correct: 94.9 average row correct: ['96.3', '84.7'] IoU: ['94.2', '67.7'] mean IoU: 81.0 [epoch: 145] train_loss: 0.3397 lr: 0.003092 dice coefficient: 0.802 global correct: 94.7 average row correct: ['96.1', '84.9'] IoU: ['94.0', '67.0'] mean IoU: 80.5 [epoch: 146] train_loss: 0.3273 lr: 0.003040 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.2', '81.7'] IoU: ['94.6', '68.4'] mean IoU: 81.5 [epoch: 147] train_loss: 0.3300 lr: 0.002988 dice coefficient: 0.810 global correct: 95.1 average row correct: ['96.9', '82.7'] IoU: ['94.5', '68.2'] mean IoU: 81.3 [epoch: 148] train_loss: 0.3318 lr: 0.002937 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.2', '81.3'] IoU: ['94.6', '68.3'] mean IoU: 81.4 [epoch: 149] train_loss: 0.3350 lr: 0.002885 dice coefficient: 0.810 global correct: 95.2 average row correct: ['97.5', '80.1'] IoU: ['94.7', '68.2'] mean IoU: 81.4 [epoch: 150] train_loss: 0.3335 lr: 0.002833 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.4', '80.4'] IoU: ['94.7', '68.2'] mean IoU: 81.5 [epoch: 151] train_loss: 0.3259 lr: 0.002781 dice coefficient: 0.810 global correct: 95.1 average row correct: ['97.1', '81.9'] IoU: ['94.6', '68.2'] mean IoU: 81.4 [epoch: 152] train_loss: 0.3304 lr: 0.002728 dice coefficient: 0.810 global correct: 95.1 average row correct: ['97.1', '81.6'] IoU: ['94.6', '68.2'] mean IoU: 81.4 [epoch: 153] train_loss: 0.3352 lr: 0.002676 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.4', '80.6'] IoU: ['94.7', '68.3'] mean IoU: 81.5 [epoch: 154] train_loss: 0.3272 lr: 0.002624 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.1', '82.0'] IoU: ['94.6', '68.3'] mean IoU: 81.5 [epoch: 155] train_loss: 0.3339 lr: 0.002571 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.2', '81.6'] IoU: ['94.6', '68.4'] mean IoU: 81.5 [epoch: 156] train_loss: 0.3288 lr: 0.002519 dice coefficient: 0.812 global correct: 95.3 average row correct: ['97.4', '80.5'] IoU: ['94.7', '68.4'] mean IoU: 81.6 [epoch: 157] train_loss: 0.3247 lr: 0.002466 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.3', '81.0'] IoU: ['94.7', '68.4'] mean IoU: 81.6 [epoch: 158] train_loss: 0.3381 lr: 0.002413 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.2', '81.7'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 159] train_loss: 0.3318 lr: 0.002360 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.0', '82.8'] IoU: ['94.6', '68.5'] mean IoU: 81.5 [epoch: 160] train_loss: 0.3281 lr: 0.002307 dice coefficient: 0.813 global correct: 95.3 average row correct: ['97.3', '81.1'] IoU: ['94.7', '68.6'] mean IoU: 81.7 [epoch: 161] train_loss: 0.3322 lr: 0.002253 dice coefficient: 0.813 global correct: 95.3 average row correct: ['97.4', '80.9'] IoU: ['94.7', '68.6'] mean IoU: 81.7 [epoch: 162] train_loss: 0.3288 lr: 0.002200 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.1', '81.8'] IoU: ['94.6', '68.4'] mean IoU: 81.5 [epoch: 163] train_loss: 0.3301 lr: 0.002146 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.4', '80.5'] IoU: ['94.7', '68.3'] mean IoU: 81.5 [epoch: 164] train_loss: 0.3272 lr: 0.002093 dice coefficient: 0.809 global correct: 95.3 average row correct: ['97.7', '78.6'] IoU: ['94.8', '68.0'] mean IoU: 81.4 [epoch: 165] train_loss: 0.3313 lr: 0.002039 dice coefficient: 0.811 global correct: 95.3 average row correct: ['97.6', '79.8'] IoU: ['94.8', '68.3'] mean IoU: 81.6 [epoch: 166] train_loss: 0.3281 lr: 0.001985 dice coefficient: 0.811 global correct: 95.2 average row correct: ['97.1', '81.8'] IoU: ['94.6', '68.3'] mean IoU: 81.5 [epoch: 167] train_loss: 0.3335 lr: 0.001930 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '82.0'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 168] train_loss: 0.3280 lr: 0.001876 dice coefficient: 0.813 global correct: 95.3 average row correct: ['97.3', '81.2'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 169] train_loss: 0.3346 lr: 0.001822 dice coefficient: 0.813 global correct: 95.3 average row correct: ['97.4', '81.0'] IoU: ['94.8', '68.6'] mean IoU: 81.7 [epoch: 170] train_loss: 0.3314 lr: 0.001767 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.1', '82.1'] IoU: ['94.6', '68.6'] mean IoU: 81.6 [epoch: 171] train_loss: 0.3287 lr: 0.001712 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.1', '82.1'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 172] train_loss: 0.3258 lr: 0.001657 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.1', '82.0'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 173] train_loss: 0.3413 lr: 0.001601 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.3', '81.3'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 174] train_loss: 0.3314 lr: 0.001546 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.0', '82.6'] IoU: ['94.6', '68.4'] mean IoU: 81.5 [epoch: 175] train_loss: 0.3314 lr: 0.001490 dice coefficient: 0.812 global correct: 95.1 average row correct: ['96.9', '83.0'] IoU: ['94.6', '68.5'] mean IoU: 81.5 [epoch: 176] train_loss: 0.3302 lr: 0.001434 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.1', '82.0'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 177] train_loss: 0.3378 lr: 0.001378 dice coefficient: 0.813 global correct: 95.3 average row correct: ['97.4', '81.0'] IoU: ['94.7', '68.6'] mean IoU: 81.7 [epoch: 178] train_loss: 0.3316 lr: 0.001321 dice coefficient: 0.812 global correct: 95.3 average row correct: ['97.3', '81.0'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 179] train_loss: 0.3241 lr: 0.001265 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.2', '81.5'] IoU: ['94.7', '68.4'] mean IoU: 81.5 [epoch: 180] train_loss: 0.3229 lr: 0.001208 dice coefficient: 0.810 global correct: 95.1 average row correct: ['96.9', '82.8'] IoU: ['94.5', '68.2'] mean IoU: 81.3 [epoch: 181] train_loss: 0.3339 lr: 0.001150 dice coefficient: 0.810 global correct: 95.1 average row correct: ['96.8', '83.2'] IoU: ['94.5', '68.1'] mean IoU: 81.3 [epoch: 182] train_loss: 0.3231 lr: 0.001093 dice coefficient: 0.810 global correct: 95.1 average row correct: ['96.9', '82.8'] IoU: ['94.5', '68.2'] mean IoU: 81.4 [epoch: 183] train_loss: 0.3320 lr: 0.001035 dice coefficient: 0.811 global correct: 95.1 average row correct: ['96.9', '82.6'] IoU: ['94.5', '68.3'] mean IoU: 81.4 [epoch: 184] train_loss: 0.3238 lr: 0.000976 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.2', '81.7'] IoU: ['94.6', '68.5'] mean IoU: 81.6 [epoch: 185] train_loss: 0.3318 lr: 0.000917 dice coefficient: 0.812 global correct: 95.3 average row correct: ['97.3', '81.0'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 186] train_loss: 0.3272 lr: 0.000858 dice coefficient: 0.812 global correct: 95.3 average row correct: ['97.4', '80.5'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 187] train_loss: 0.3309 lr: 0.000799 dice coefficient: 0.812 global correct: 95.3 average row correct: ['97.4', '80.8'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 188] train_loss: 0.3290 lr: 0.000738 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.3', '81.3'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 189] train_loss: 0.3338 lr: 0.000678 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.1', '82.1'] IoU: ['94.6', '68.4'] mean IoU: 81.5 [epoch: 190] train_loss: 0.3240 lr: 0.000616 dice coefficient: 0.812 global correct: 95.2 average row correct: ['97.1', '82.0'] IoU: ['94.6', '68.4'] mean IoU: 81.5 [epoch: 191] train_loss: 0.3227 lr: 0.000554 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '81.7'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 192] train_loss: 0.3224 lr: 0.000492 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.3', '81.4'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 193] train_loss: 0.3254 lr: 0.000428 dice coefficient: 0.813 global correct: 95.3 average row correct: ['97.3', '81.3'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 194] train_loss: 0.3269 lr: 0.000363 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.3', '81.4'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 195] train_loss: 0.3352 lr: 0.000297 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '81.9'] IoU: ['94.7', '68.5'] mean IoU: 81.6 [epoch: 196] train_loss: 0.3217 lr: 0.000229 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '81.7'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 197] train_loss: 0.3253 lr: 0.000159 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '81.7'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 198] train_loss: 0.3281 lr: 0.000085 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '81.6'] IoU: ['94.7', '68.6'] mean IoU: 81.6 [epoch: 199] train_loss: 0.3214 lr: 0.000000 dice coefficient: 0.813 global correct: 95.2 average row correct: ['97.2', '81.5'] IoU: ['94.7', '68.6'] mean IoU: 81.6 ================================================ FILE: pytorch_segmentation/unet/src/__init__.py ================================================ from .unet import UNet from .mobilenet_unet import MobileV3Unet from .vgg_unet import VGG16UNet ================================================ FILE: pytorch_segmentation/unet/src/mobilenet_unet.py ================================================ from collections import OrderedDict from typing import Dict import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor from torchvision.models import mobilenet_v3_large from .unet import Up, OutConv class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Args: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ _version = 2 __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None: if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} # 重新构建backbone,将没有使用到的模块全部删掉 layers = OrderedDict() for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super(IntermediateLayerGetter, self).__init__(layers) self.return_layers = orig_return_layers def forward(self, x: Tensor) -> Dict[str, Tensor]: out = OrderedDict() for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class MobileV3Unet(nn.Module): def __init__(self, num_classes, pretrain_backbone: bool = False): super(MobileV3Unet, self).__init__() backbone = mobilenet_v3_large(pretrained=pretrain_backbone) # if pretrain_backbone: # # 载入mobilenetv3 large backbone预训练权重 # # https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth # backbone.load_state_dict(torch.load("mobilenet_v3_large.pth", map_location='cpu')) backbone = backbone.features stage_indices = [1, 3, 6, 12, 15] self.stage_out_channels = [backbone[i].out_channels for i in stage_indices] return_layers = dict([(str(j), f"stage{i}") for i, j in enumerate(stage_indices)]) self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) c = self.stage_out_channels[4] + self.stage_out_channels[3] self.up1 = Up(c, self.stage_out_channels[3]) c = self.stage_out_channels[3] + self.stage_out_channels[2] self.up2 = Up(c, self.stage_out_channels[2]) c = self.stage_out_channels[2] + self.stage_out_channels[1] self.up3 = Up(c, self.stage_out_channels[1]) c = self.stage_out_channels[1] + self.stage_out_channels[0] self.up4 = Up(c, self.stage_out_channels[0]) self.conv = OutConv(self.stage_out_channels[0], num_classes=num_classes) def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: input_shape = x.shape[-2:] backbone_out = self.backbone(x) x = self.up1(backbone_out['stage4'], backbone_out['stage3']) x = self.up2(x, backbone_out['stage2']) x = self.up3(x, backbone_out['stage1']) x = self.up4(x, backbone_out['stage0']) x = self.conv(x) x = F.interpolate(x, size=input_shape, mode="bilinear", align_corners=False) return {"out": x} ================================================ FILE: pytorch_segmentation/unet/src/unet.py ================================================ from typing import Dict import torch import torch.nn as nn import torch.nn.functional as F class DoubleConv(nn.Sequential): def __init__(self, in_channels, out_channels, mid_channels=None): if mid_channels is None: mid_channels = out_channels super(DoubleConv, self).__init__( nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(mid_channels), nn.ReLU(inplace=True), nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) class Down(nn.Sequential): def __init__(self, in_channels, out_channels): super(Down, self).__init__( nn.MaxPool2d(2, stride=2), DoubleConv(in_channels, out_channels) ) class Up(nn.Module): def __init__(self, in_channels, out_channels, bilinear=True): super(Up, self).__init__() if bilinear: self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) self.conv = DoubleConv(in_channels, out_channels, in_channels // 2) else: self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2) self.conv = DoubleConv(in_channels, out_channels) def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor: x1 = self.up(x1) # [N, C, H, W] diff_y = x2.size()[2] - x1.size()[2] diff_x = x2.size()[3] - x1.size()[3] # padding_left, padding_right, padding_top, padding_bottom x1 = F.pad(x1, [diff_x // 2, diff_x - diff_x // 2, diff_y // 2, diff_y - diff_y // 2]) x = torch.cat([x2, x1], dim=1) x = self.conv(x) return x class OutConv(nn.Sequential): def __init__(self, in_channels, num_classes): super(OutConv, self).__init__( nn.Conv2d(in_channels, num_classes, kernel_size=1) ) class UNet(nn.Module): def __init__(self, in_channels: int = 1, num_classes: int = 2, bilinear: bool = True, base_c: int = 64): super(UNet, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.bilinear = bilinear self.in_conv = DoubleConv(in_channels, base_c) self.down1 = Down(base_c, base_c * 2) self.down2 = Down(base_c * 2, base_c * 4) self.down3 = Down(base_c * 4, base_c * 8) factor = 2 if bilinear else 1 self.down4 = Down(base_c * 8, base_c * 16 // factor) self.up1 = Up(base_c * 16, base_c * 8 // factor, bilinear) self.up2 = Up(base_c * 8, base_c * 4 // factor, bilinear) self.up3 = Up(base_c * 4, base_c * 2 // factor, bilinear) self.up4 = Up(base_c * 2, base_c, bilinear) self.out_conv = OutConv(base_c, num_classes) def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: x1 = self.in_conv(x) x2 = self.down1(x1) x3 = self.down2(x2) x4 = self.down3(x3) x5 = self.down4(x4) x = self.up1(x5, x4) x = self.up2(x, x3) x = self.up3(x, x2) x = self.up4(x, x1) logits = self.out_conv(x) return {"out": logits} ================================================ FILE: pytorch_segmentation/unet/src/vgg_unet.py ================================================ from collections import OrderedDict from typing import Dict import torch import torch.nn as nn from torch import Tensor from torchvision.models import vgg16_bn from .unet import Up, OutConv class IntermediateLayerGetter(nn.ModuleDict): """ Module wrapper that returns intermediate layers from a model It has a strong assumption that the modules have been registered into the model in the same order as they are used. This means that one should **not** reuse the same nn.Module twice in the forward if you want this to work. Additionally, it is only able to query submodules that are directly assigned to the model. So if `model` is passed, `model.feature1` can be returned, but not `model.feature1.layer2`. Args: model (nn.Module): model on which we will extract the features return_layers (Dict[name, new_name]): a dict containing the names of the modules for which the activations will be returned as the key of the dict, and the value of the dict is the name of the returned activation (which the user can specify). """ _version = 2 __annotations__ = { "return_layers": Dict[str, str], } def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None: if not set(return_layers).issubset([name for name, _ in model.named_children()]): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers return_layers = {str(k): str(v) for k, v in return_layers.items()} # 重新构建backbone,将没有使用到的模块全部删掉 layers = OrderedDict() for name, module in model.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break super(IntermediateLayerGetter, self).__init__(layers) self.return_layers = orig_return_layers def forward(self, x: Tensor) -> Dict[str, Tensor]: out = OrderedDict() for name, module in self.items(): x = module(x) if name in self.return_layers: out_name = self.return_layers[name] out[out_name] = x return out class VGG16UNet(nn.Module): def __init__(self, num_classes, pretrain_backbone: bool = False): super(VGG16UNet, self).__init__() backbone = vgg16_bn(pretrained=pretrain_backbone) # if pretrain_backbone: # # 载入vgg16_bn预训练权重 # # https://download.pytorch.org/models/vgg16_bn-6c64b313.pth # backbone.load_state_dict(torch.load("vgg16_bn.pth", map_location='cpu')) backbone = backbone.features stage_indices = [5, 12, 22, 32, 42] self.stage_out_channels = [64, 128, 256, 512, 512] return_layers = dict([(str(j), f"stage{i}") for i, j in enumerate(stage_indices)]) self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) c = self.stage_out_channels[4] + self.stage_out_channels[3] self.up1 = Up(c, self.stage_out_channels[3]) c = self.stage_out_channels[3] + self.stage_out_channels[2] self.up2 = Up(c, self.stage_out_channels[2]) c = self.stage_out_channels[2] + self.stage_out_channels[1] self.up3 = Up(c, self.stage_out_channels[1]) c = self.stage_out_channels[1] + self.stage_out_channels[0] self.up4 = Up(c, self.stage_out_channels[0]) self.conv = OutConv(self.stage_out_channels[0], num_classes=num_classes) def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: backbone_out = self.backbone(x) x = self.up1(backbone_out['stage4'], backbone_out['stage3']) x = self.up2(x, backbone_out['stage2']) x = self.up3(x, backbone_out['stage1']) x = self.up4(x, backbone_out['stage0']) x = self.conv(x) return {"out": x} ================================================ FILE: pytorch_segmentation/unet/train.py ================================================ import os import time import datetime import torch from src import UNet from train_utils import train_one_epoch, evaluate, create_lr_scheduler from my_dataset import DriveDataset import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, vflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(1.2 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) if vflip_prob > 0: trans.append(T.RandomVerticalFlip(vflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): base_size = 565 crop_size = 480 if train: return SegmentationPresetTrain(base_size, crop_size, mean=mean, std=std) else: return SegmentationPresetEval(mean=mean, std=std) def create_model(num_classes): model = UNet(in_channels=3, num_classes=num_classes, base_c=32) return model def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") batch_size = args.batch_size # segmentation nun_classes + background num_classes = args.num_classes + 1 # using compute_mean_std.py mean = (0.709, 0.381, 0.224) std = (0.127, 0.079, 0.043) # 用来保存训练以及验证过程中信息 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) train_dataset = DriveDataset(args.data_path, train=True, transforms=get_transform(train=True, mean=mean, std=std)) val_dataset = DriveDataset(args.data_path, train=False, transforms=get_transform(train=False, mean=mean, std=std)) num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, collate_fn=train_dataset.collate_fn) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, pin_memory=True, collate_fn=val_dataset.collate_fn) model = create_model(num_classes=num_classes) model.to(device) params_to_optimize = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) best_dice = 0. start_time = time.time() for epoch in range(args.start_epoch, args.epochs): mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch, num_classes, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat, dice = evaluate(model, val_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) print(f"dice coefficient: {dice:.3f}") # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" \ f"dice coefficient: {dice:.3f}\n" f.write(train_info + val_info + "\n\n") if args.save_best is True: if best_dice < dice: best_dice = dice else: continue save_file = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, "args": args} if args.amp: save_file["scaler"] = scaler.state_dict() if args.save_best is True: torch.save(save_file, "save_weights/best_model.pth") else: torch.save(save_file, "save_weights/model_{}.pth".format(epoch)) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("training time {}".format(total_time_str)) def parse_args(): import argparse parser = argparse.ArgumentParser(description="pytorch unet training") parser.add_argument("--data-path", default="./", help="DRIVE root") # exclude background parser.add_argument("--num-classes", default=1, type=int) parser.add_argument("--device", default="cuda", help="training device") parser.add_argument("-b", "--batch-size", default=4, type=int) parser.add_argument("--epochs", default=200, type=int, metavar="N", help="number of total epochs to train") parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('--print-freq', default=1, type=int, help='print frequency') parser.add_argument('--resume', default='', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='start epoch') parser.add_argument('--save-best', default=True, type=bool, help='only save best dice weights') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if not os.path.exists("./save_weights"): os.mkdir("./save_weights") main(args) ================================================ FILE: pytorch_segmentation/unet/train_multi_GPU.py ================================================ import time import os import datetime import torch from src import UNet from train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir from my_dataset import DriveDataset import transforms as T class SegmentationPresetTrain: def __init__(self, base_size, crop_size, hflip_prob=0.5, vflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): min_size = int(0.5 * base_size) max_size = int(1.2 * base_size) trans = [T.RandomResize(min_size, max_size)] if hflip_prob > 0: trans.append(T.RandomHorizontalFlip(hflip_prob)) if vflip_prob > 0: trans.append(T.RandomVerticalFlip(vflip_prob)) trans.extend([ T.RandomCrop(crop_size), T.ToTensor(), T.Normalize(mean=mean, std=std), ]) self.transforms = T.Compose(trans) def __call__(self, img, target): return self.transforms(img, target) class SegmentationPresetEval: def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): self.transforms = T.Compose([ T.ToTensor(), T.Normalize(mean=mean, std=std), ]) def __call__(self, img, target): return self.transforms(img, target) def get_transform(train, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): base_size = 565 crop_size = 480 if train: return SegmentationPresetTrain(base_size, crop_size, mean=mean, std=std) else: return SegmentationPresetEval(mean=mean, std=std) def create_model(num_classes): model = UNet(in_channels=3, num_classes=num_classes, base_c=32) return model def main(args): init_distributed_mode(args) print(args) device = torch.device(args.device) # segmentation nun_classes + background num_classes = args.num_classes + 1 mean = (0.709, 0.381, 0.224) std = (0.127, 0.079, 0.043) # 用来保存coco_info的文件 results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_root = args.data_path # check data root if os.path.exists(os.path.join(data_root, "DRIVE")) is False: raise FileNotFoundError("DRIVE dose not in path:'{}'.".format(data_root)) train_dataset = DriveDataset(args.data_path, train=True, transforms=get_transform(train=True, mean=mean, std=std)) val_dataset = DriveDataset(args.data_path, train=False, transforms=get_transform(train=False, mean=mean, std=std)) print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) test_sampler = torch.utils.data.SequentialSampler(val_dataset) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn, drop_last=True) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=train_dataset.collate_fn) print("Creating model") # create model num_classes equal background + foreground classes model = create_model(num_classes=num_classes) model.to(device) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params_to_optimize = [p for p in model_without_ddp.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None # 创建学习率更新策略,这里是每个step更新一次(不是每个epoch) lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True) # 如果传入resume参数,即上次训练的权重地址,则接着上次的参数训练 if args.resume: # If map_location is missing, torch.load will first load the module to CPU # and then copy each parameter to where it was saved, # which would result in all processes on the same machine using the same set of devices. checkpoint = torch.load(args.resume, map_location='cpu') # 读取之前保存的权重文件(包括优化器以及学习率策略) model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.amp: scaler.load_state_dict(checkpoint["scaler"]) if args.test_only: confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) return best_dice = 0. print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, num_classes, lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler) confmat, dice = evaluate(model, val_data_loader, device=device, num_classes=num_classes) val_info = str(confmat) print(val_info) print(f"dice coefficient: {dice:.3f}") # 只在主进程上进行写操作 if args.rank in [-1, 0]: # write into txt with open(results_file, "a") as f: # 记录每个epoch对应的train_loss、lr以及验证集各指标 train_info = f"[epoch: {epoch}]\n" \ f"train_loss: {mean_loss:.4f}\n" \ f"lr: {lr:.6f}\n" \ f"dice coefficient: {dice:.3f}\n" f.write(train_info + val_info + "\n\n") if args.save_best is True: if best_dice < dice: best_dice = dice else: continue if args.output_dir: # 只在主节点上执行保存权重操作 save_file = {'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch} if args.amp: save_file["scaler"] = scaler.state_dict() if args.save_best is True: save_on_master(save_file, os.path.join(args.output_dir, 'best_model.pth')) else: save_on_master(save_file, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=__doc__) # 训练文件的根目录(DRIVE) parser.add_argument('--data-path', default='./', help='dataset') # 训练设备类型 parser.add_argument('--device', default='cuda', help='device') # 检测目标类别数(不包含背景) parser.add_argument('--num-classes', default=1, type=int, help='num_classes') # 每块GPU上的batch_size parser.add_argument('-b', '--batch-size', default=4, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') # 指定接着从哪个epoch数开始训练 parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') # 训练的总epoch数 parser.add_argument('--epochs', default=200, type=int, metavar='N', help='number of total epochs to run') # 是否使用同步BN(在多个GPU之间同步),默认不开启,开启后训练速度会变慢 parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm') # 数据加载以及预处理的线程数 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') # 训练学习率,这里默认设置成0.01(使用n块GPU建议乘以n),如果效果不好可以尝试修改学习率 parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate') # SGD的momentum参数 parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') # SGD的weight_decay参数 parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') # 只保存dice coefficient值最高的权重 parser.add_argument('--save-best', default=True, type=bool, help='only save best weights') # 训练过程打印信息的频率 parser.add_argument('--print-freq', default=1, type=int, help='print frequency') # 文件保存地址 parser.add_argument('--output-dir', default='./multi_train', help='path where to save') # 基于上次的训练结果接着训练 parser.add_argument('--resume', default='', help='resume from checkpoint') # 不训练,仅测试 parser.add_argument( "--test-only", dest="test_only", help="Only test the model", action="store_true", ) # 分布式进程数 parser.add_argument('--world-size', default=1, type=int, help='number of distributed processes') parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') # Mixed precision training parameters parser.add_argument("--amp", default=False, type=bool, help="Use torch.cuda.amp for mixed precision training") args = parser.parse_args() # 如果指定了保存文件地址,检查文件夹是否存在,若不存在,则创建 if args.output_dir: mkdir(args.output_dir) main(args) ================================================ FILE: pytorch_segmentation/unet/train_utils/__init__.py ================================================ from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler from .distributed_utils import init_distributed_mode, save_on_master, mkdir ================================================ FILE: pytorch_segmentation/unet/train_utils/dice_coefficient_loss.py ================================================ import torch import torch.nn as nn def build_target(target: torch.Tensor, num_classes: int = 2, ignore_index: int = -100): """build target for dice coefficient""" dice_target = target.clone() if ignore_index >= 0: ignore_mask = torch.eq(target, ignore_index) dice_target[ignore_mask] = 0 # [N, H, W] -> [N, H, W, C] dice_target = nn.functional.one_hot(dice_target, num_classes).float() dice_target[ignore_mask] = ignore_index else: dice_target = nn.functional.one_hot(dice_target, num_classes).float() return dice_target.permute(0, 3, 1, 2) def dice_coeff(x: torch.Tensor, target: torch.Tensor, ignore_index: int = -100, epsilon=1e-6): # Average of Dice coefficient for all batches, or for a single mask # 计算一个batch中所有图片某个类别的dice_coefficient d = 0. batch_size = x.shape[0] for i in range(batch_size): x_i = x[i].reshape(-1) t_i = target[i].reshape(-1) if ignore_index >= 0: # 找出mask中不为ignore_index的区域 roi_mask = torch.ne(t_i, ignore_index) x_i = x_i[roi_mask] t_i = t_i[roi_mask] inter = torch.dot(x_i, t_i) sets_sum = torch.sum(x_i) + torch.sum(t_i) if sets_sum == 0: sets_sum = 2 * inter d += (2 * inter + epsilon) / (sets_sum + epsilon) return d / batch_size def multiclass_dice_coeff(x: torch.Tensor, target: torch.Tensor, ignore_index: int = -100, epsilon=1e-6): """Average of Dice coefficient for all classes""" dice = 0. for channel in range(x.shape[1]): dice += dice_coeff(x[:, channel, ...], target[:, channel, ...], ignore_index, epsilon) return dice / x.shape[1] def dice_loss(x: torch.Tensor, target: torch.Tensor, multiclass: bool = False, ignore_index: int = -100): # Dice loss (objective to minimize) between 0 and 1 x = nn.functional.softmax(x, dim=1) fn = multiclass_dice_coeff if multiclass else dice_coeff return 1 - fn(x, target, ignore_index=ignore_index) ================================================ FILE: pytorch_segmentation/unet/train_utils/distributed_utils.py ================================================ from collections import defaultdict, deque import datetime import time import torch import torch.nn.functional as F import torch.distributed as dist import errno import os from .dice_coefficient_loss import multiclass_dice_coeff, build_target class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ def __init__(self, window_size=20, fmt=None): if fmt is None: fmt = "{value:.4f} ({global_avg:.4f})" self.deque = deque(maxlen=window_size) self.total = 0.0 self.count = 0 self.fmt = fmt def update(self, value, n=1): self.deque.append(value) self.count += n self.total += value * n def synchronize_between_processes(self): """ Warning: does not synchronize the deque! """ if not is_dist_avail_and_initialized(): return t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') dist.barrier() dist.all_reduce(t) t = t.tolist() self.count = int(t[0]) self.total = t[1] @property def median(self): d = torch.tensor(list(self.deque)) return d.median().item() @property def avg(self): d = torch.tensor(list(self.deque), dtype=torch.float32) return d.mean().item() @property def global_avg(self): return self.total / self.count @property def max(self): return max(self.deque) @property def value(self): return self.deque[-1] def __str__(self): return self.fmt.format( median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value) class ConfusionMatrix(object): def __init__(self, num_classes): self.num_classes = num_classes self.mat = None def update(self, a, b): n = self.num_classes if self.mat is None: # 创建混淆矩阵 self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) with torch.no_grad(): # 寻找GT中为目标的像素索引 k = (a >= 0) & (a < n) # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙) inds = n * a[k].to(torch.int64) + b[k] self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) def reset(self): if self.mat is not None: self.mat.zero_() def compute(self): h = self.mat.float() # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数) acc_global = torch.diag(h).sum() / h.sum() # 计算每个类别的准确率 acc = torch.diag(h) / h.sum(1) # 计算每个类别预测与真实目标的iou iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) return acc_global, acc, iu def reduce_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() torch.distributed.all_reduce(self.mat) def __str__(self): acc_global, acc, iu = self.compute() return ( 'global correct: {:.1f}\n' 'average row correct: {}\n' 'IoU: {}\n' 'mean IoU: {:.1f}').format( acc_global.item() * 100, ['{:.1f}'.format(i) for i in (acc * 100).tolist()], ['{:.1f}'.format(i) for i in (iu * 100).tolist()], iu.mean().item() * 100) class DiceCoefficient(object): def __init__(self, num_classes: int = 2, ignore_index: int = -100): self.cumulative_dice = None self.num_classes = num_classes self.ignore_index = ignore_index self.count = None def update(self, pred, target): if self.cumulative_dice is None: self.cumulative_dice = torch.zeros(1, dtype=pred.dtype, device=pred.device) if self.count is None: self.count = torch.zeros(1, dtype=pred.dtype, device=pred.device) # compute the Dice score, ignoring background pred = F.one_hot(pred.argmax(dim=1), self.num_classes).permute(0, 3, 1, 2).float() dice_target = build_target(target, self.num_classes, self.ignore_index) self.cumulative_dice += multiclass_dice_coeff(pred[:, 1:], dice_target[:, 1:], ignore_index=self.ignore_index) self.count += 1 @property def value(self): if self.count == 0: return 0 else: return self.cumulative_dice / self.count def reset(self): if self.cumulative_dice is not None: self.cumulative_dice.zero_() if self.count is not None: self.count.zeros_() def reduce_from_all_processes(self): if not torch.distributed.is_available(): return if not torch.distributed.is_initialized(): return torch.distributed.barrier() torch.distributed.all_reduce(self.cumulative_dice) torch.distributed.all_reduce(self.count) class MetricLogger(object): def __init__(self, delimiter="\t"): self.meters = defaultdict(SmoothedValue) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): v = v.item() assert isinstance(v, (float, int)) self.meters[k].update(v) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] if attr in self.__dict__: return self.__dict__[attr] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, attr)) def __str__(self): loss_str = [] for name, meter in self.meters.items(): loss_str.append( "{}: {}".format(name, str(meter)) ) return self.delimiter.join(loss_str) def synchronize_between_processes(self): for meter in self.meters.values(): meter.synchronize_between_processes() def add_meter(self, name, meter): self.meters[name] = meter def log_every(self, iterable, print_freq, header=None): i = 0 if not header: header = '' start_time = time.time() end = time.time() iter_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt='{avg:.4f}') space_fmt = ':' + str(len(str(len(iterable)))) + 'd' if torch.cuda.is_available(): log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}', 'max mem: {memory:.0f}' ]) else: log_msg = self.delimiter.join([ header, '[{0' + space_fmt + '}/{1}]', 'eta: {eta}', '{meters}', 'time: {time}', 'data: {data}' ]) MB = 1024.0 * 1024.0 for obj in iterable: data_time.update(time.time() - end) yield obj iter_time.update(time.time() - end) if i % print_freq == 0: eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if torch.cuda.is_available(): print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time), memory=torch.cuda.max_memory_allocated() / MB)) else: print(log_msg.format( i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time))) i += 1 end = time.time() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('{} Total time: {}'.format(header, total_time_str)) def mkdir(path): try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def setup_for_distributed(is_master): """ This function disables printing when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print def is_dist_avail_and_initialized(): if not dist.is_available(): return False if not dist.is_initialized(): return False return True def get_world_size(): if not is_dist_avail_and_initialized(): return 1 return dist.get_world_size() def get_rank(): if not is_dist_avail_and_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def save_on_master(*args, **kwargs): if is_main_process(): torch.save(*args, **kwargs) def init_distributed_mode(args): if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: args.rank = int(os.environ["RANK"]) args.world_size = int(os.environ['WORLD_SIZE']) args.gpu = int(os.environ['LOCAL_RANK']) elif 'SLURM_PROCID' in os.environ: args.rank = int(os.environ['SLURM_PROCID']) args.gpu = args.rank % torch.cuda.device_count() elif hasattr(args, "rank"): pass else: print('Not using distributed mode') args.distributed = False return args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format( args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) setup_for_distributed(args.rank == 0) ================================================ FILE: pytorch_segmentation/unet/train_utils/train_and_eval.py ================================================ import torch from torch import nn import train_utils.distributed_utils as utils from .dice_coefficient_loss import dice_loss, build_target def criterion(inputs, target, loss_weight=None, num_classes: int = 2, dice: bool = True, ignore_index: int = -100): losses = {} for name, x in inputs.items(): # 忽略target中值为255的像素,255的像素是目标边缘或者padding填充 loss = nn.functional.cross_entropy(x, target, ignore_index=ignore_index, weight=loss_weight) if dice is True: dice_target = build_target(target, num_classes, ignore_index) loss += dice_loss(x, dice_target, multiclass=True, ignore_index=ignore_index) losses[name] = loss if len(losses) == 1: return losses['out'] return losses['out'] + 0.5 * losses['aux'] def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) dice = utils.DiceCoefficient(num_classes=num_classes, ignore_index=255) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output['out'] confmat.update(target.flatten(), output.argmax(1).flatten()) dice.update(output, target) confmat.reduce_from_all_processes() dice.reduce_from_all_processes() return confmat, dice.value.item() def train_one_epoch(model, optimizer, data_loader, device, epoch, num_classes, lr_scheduler, print_freq=10, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) if num_classes == 2: # 设置cross_entropy中背景和前景的loss权重(根据自己的数据集进行设置) loss_weight = torch.as_tensor([1.0, 2.0], device=device) else: loss_weight = None for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(image) loss = criterion(output, target, loss_weight, num_classes=num_classes, ignore_index=255) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() lr_scheduler.step() lr = optimizer.param_groups[0]["lr"] metric_logger.update(loss=loss.item(), lr=lr) return metric_logger.meters["loss"].global_avg, lr def create_lr_scheduler(optimizer, num_step: int, epochs: int, warmup=True, warmup_epochs=1, warmup_factor=1e-3): assert num_step > 0 and epochs > 0 if warmup is False: warmup_epochs = 0 def f(x): """ 根据step数返回一个学习率倍率因子, 注意在训练开始之前,pytorch会提前调用一次lr_scheduler.step()方法 """ if warmup is True and x <= (warmup_epochs * num_step): alpha = float(x) / (warmup_epochs * num_step) # warmup过程中lr倍率因子从warmup_factor -> 1 return warmup_factor * (1 - alpha) + alpha else: # warmup后lr倍率因子从1 -> 0 # 参考deeplab_v2: Learning rate policy return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9 return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f) ================================================ FILE: pytorch_segmentation/unet/transforms.py ================================================ import numpy as np import random import torch from torchvision import transforms as T from torchvision.transforms import functional as F def pad_if_smaller(img, size, fill=0): # 如果图像最小边长小于给定size,则用数值fill进行padding min_size = min(img.size) if min_size < size: ow, oh = img.size padh = size - oh if oh < size else 0 padw = size - ow if ow < size else 0 img = F.pad(img, (0, 0, padw, padh), fill=fill) return img class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target class RandomResize(object): def __init__(self, min_size, max_size=None): self.min_size = min_size if max_size is None: max_size = min_size self.max_size = max_size def __call__(self, image, target): size = random.randint(self.min_size, self.max_size) # 这里size传入的是int类型,所以是将图像的最小边长缩放到size大小 image = F.resize(image, size) # 这里的interpolation注意下,在torchvision(0.9.0)以后才有InterpolationMode.NEAREST # 如果是之前的版本需要使用PIL.Image.NEAREST target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST) return image, target class RandomHorizontalFlip(object): def __init__(self, flip_prob): self.flip_prob = flip_prob def __call__(self, image, target): if random.random() < self.flip_prob: image = F.hflip(image) target = F.hflip(target) return image, target class RandomVerticalFlip(object): def __init__(self, flip_prob): self.flip_prob = flip_prob def __call__(self, image, target): if random.random() < self.flip_prob: image = F.vflip(image) target = F.vflip(target) return image, target class RandomCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = pad_if_smaller(image, self.size) target = pad_if_smaller(target, self.size, fill=255) crop_params = T.RandomCrop.get_params(image, (self.size, self.size)) image = F.crop(image, *crop_params) target = F.crop(target, *crop_params) return image, target class CenterCrop(object): def __init__(self, size): self.size = size def __call__(self, image, target): image = F.center_crop(image, self.size) target = F.center_crop(target, self.size) return image, target class ToTensor(object): def __call__(self, image, target): image = F.to_tensor(image) target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target ================================================ FILE: summary_problem.md ================================================ ## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装 参考我之前写的博文:[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU(CUDA10.1)](https://blog.csdn.net/qq_37541097/article/details/103933366) ## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0] 强烈不建议混用,即使两个模型的名称结构完全一致也不要混用,里面有坑,用什么方法训练的模型就载入相应的模型权重 ## 使用subclassed模型时无法使用model.summary() [tensorflow2.0.0] subclassed模型在实例化时没有自动进行build操作(只有在开始训练时,才会自动进行build),如果需要使用summary操作,需要提前手动build model.build((batch_size, height, width, channel)) ## 无法使用keras的plot_model(model, 'my_model.png')问题 [tensorflow2.0.0] #### 在linux下你需要安装一些包: * pip install pydot==1.2.3 * sudo apt-get install graphviz #### 在windows中,同样需要安装一些包(windows比较麻烦): * pip install pydot==1.2.3 * 安装graphviz,并添加相关环境变量 参考连接:https://github.com/XifengGuo/CapsNet-Keras/issues/7 ## 为什么每计算一个batch,就需要调用一次optimizer.zero_grad() [Pytorch1.3] 如果不清除历史梯度,就会对计算的历史梯度进行累加(通过这个特性你能够变相实现一个很大batch数值的训练) 参考链接:https://www.zhihu.com/question/303070254 ## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3] pillow版本过高导致,安装版本号小于7.0.0即可 ================================================ FILE: tensorflow_classification/ConfusionMatrix/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: tensorflow_classification/ConfusionMatrix/main.py ================================================ import os import math import json import glob from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt import tensorflow as tf import numpy as np from tqdm import tqdm from prettytable import PrettyTable from model import MobileNetV2 class ConfusionMatrix(object): """ 注意,如果显示的图像不全,是matplotlib版本问题 本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常 需要额外安装prettytable库 """ def __init__(self, num_classes: int, labels: list): self.matrix = np.zeros((num_classes, num_classes)) self.num_classes = num_classes self.labels = labels def update(self, preds, labels): for p, t in zip(preds, labels): self.matrix[p, t] += 1 def summary(self): # calculate accuracy sum_TP = 0 for i in range(self.num_classes): sum_TP += self.matrix[i, i] acc = sum_TP / np.sum(self.matrix) print("the model accuracy is ", acc) # precision, recall, specificity table = PrettyTable() table.field_names = ["", "Precision", "Recall", "Specificity"] for i in range(self.num_classes): TP = self.matrix[i, i] FP = np.sum(self.matrix[i, :]) - TP FN = np.sum(self.matrix[:, i]) - TP TN = np.sum(self.matrix) - TP - FP - FN Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0. Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0. Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0. table.add_row([self.labels[i], Precision, Recall, Specificity]) print(table) def plot(self): matrix = self.matrix print(matrix) plt.imshow(matrix, cmap=plt.cm.Blues) # 设置x轴坐标label plt.xticks(range(self.num_classes), self.labels, rotation=45) # 设置y轴坐标label plt.yticks(range(self.num_classes), self.labels) # 显示colorbar plt.colorbar() plt.xlabel('True Labels') plt.ylabel('Predicted Labels') plt.title('Confusion matrix') # 在图中标注数量/概率信息 thresh = matrix.max() / 2 for x in range(self.num_classes): for y in range(self.num_classes): # 注意这里的matrix[y, x]不是matrix[x, y] info = int(matrix[y, x]) plt.text(x, y, info, verticalalignment='center', horizontalalignment='center', color="white" if info > thresh else "black") plt.tight_layout() plt.show() if __name__ == '__main__': data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path validation_dir = os.path.join(image_path, "val") assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) im_height = 224 im_width = 224 batch_size = 16 def pre_function(img): # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img / 255. img = (img - 0.5) * 2.0 return img # data generator with data augmentation validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') # img, _ = next(train_data_gen) total_val = val_data_gen.n model = MobileNetV2(num_classes=5) # feature.build((None, 224, 224, 3)) # when using subclass model pre_weights_path = './myMobileNet.ckpt' assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path) # read class_indict label_path = './class_indices.json' assert os.path.exists(label_path), "cannot find {}".format(label_path) json_file = open(label_path, 'r') class_indict = json.load(json_file) labels = [label for _, label in class_indict.items()] confusion = ConfusionMatrix(num_classes=5, labels=labels) # validate for step in tqdm(range(math.ceil(total_val / batch_size))): val_images, val_labels = next(val_data_gen) results = model.predict_on_batch(val_images) results = tf.keras.layers.Softmax()(results).numpy() results = np.argmax(results, axis=-1) labels = np.argmax(val_labels, axis=-1) confusion.update(results, labels) confusion.plot() confusion.summary() ================================================ FILE: tensorflow_classification/ConfusionMatrix/model.py ================================================ from tensorflow.keras import layers, Model, Sequential def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(layers.Layer): def __init__(self, out_channel, kernel_size=3, stride=1, **kwargs): super(ConvBNReLU, self).__init__(**kwargs) self.conv = layers.Conv2D(filters=out_channel, kernel_size=kernel_size, strides=stride, padding='SAME', use_bias=False, name='Conv2d') self.bn = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='BatchNorm') self.activation = layers.ReLU(max_value=6.0) def call(self, inputs, training=False, **kwargs): x = self.conv(inputs) x = self.bn(x, training=training) x = self.activation(x) return x class InvertedResidual(layers.Layer): def __init__(self, in_channel, out_channel, stride, expand_ratio, **kwargs): super(InvertedResidual, self).__init__(**kwargs) self.hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel layer_list = [] if expand_ratio != 1: # 1x1 pointwise conv layer_list.append(ConvBNReLU(out_channel=self.hidden_channel, kernel_size=1, name='expand')) layer_list.extend([ # 3x3 depthwise conv layers.DepthwiseConv2D(kernel_size=3, padding='SAME', strides=stride, use_bias=False, name='depthwise'), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='depthwise/BatchNorm'), layers.ReLU(max_value=6.0), # 1x1 pointwise conv(linear) layers.Conv2D(filters=out_channel, kernel_size=1, strides=1, padding='SAME', use_bias=False, name='project'), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='project/BatchNorm') ]) self.main_branch = Sequential(layer_list, name='expanded_conv') def call(self, inputs, **kwargs): if self.use_shortcut: return inputs + self.main_branch(inputs) else: return self.main_branch(inputs) def MobileNetV2(im_height=224, im_width=224, num_classes=1000, alpha=1.0, round_nearest=8): block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] input_image = layers.Input(shape=(im_height, im_width, 3), dtype='float32') # conv1 x = ConvBNReLU(input_channel, stride=2, name='Conv')(input_image) # building inverted residual residual blockes for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 x = block(x.shape[-1], output_channel, stride, expand_ratio=t)(x) # building last several layers x = ConvBNReLU(last_channel, kernel_size=1, name='Conv_1')(x) # building classifier x = layers.GlobalAveragePooling2D()(x) # pool + flatten x = layers.Dropout(0.2)(x) output = layers.Dense(num_classes, name='Logits')(x) model = Model(inputs=input_image, outputs=output) return model ================================================ FILE: tensorflow_classification/ConvNeXt/model.py ================================================ import numpy as np import tensorflow as tf from tensorflow.keras import layers, initializers, Model KERNEL_INITIALIZER = { "class_name": "TruncatedNormal", "config": { "stddev": 0.2 } } BIAS_INITIALIZER = "Zeros" class Block(layers.Layer): """ Args: dim (int): Number of input channels. drop_rate (float): Stochastic depth rate. Default: 0.0 layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. """ def __init__(self, dim, drop_rate=0., layer_scale_init_value=1e-6, name: str = None): super().__init__(name=name) self.layer_scale_init_value = layer_scale_init_value self.dwconv = layers.DepthwiseConv2D(7, padding="same", depthwise_initializer=KERNEL_INITIALIZER, bias_initializer=BIAS_INITIALIZER, name="dwconv") self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm") self.pwconv1 = layers.Dense(4 * dim, kernel_initializer=KERNEL_INITIALIZER, bias_initializer=BIAS_INITIALIZER, name="pwconv1") self.act = layers.Activation("gelu") self.pwconv2 = layers.Dense(dim, kernel_initializer=KERNEL_INITIALIZER, bias_initializer=BIAS_INITIALIZER, name="pwconv2") self.drop_path = layers.Dropout(drop_rate, noise_shape=(None, 1, 1, 1)) if drop_rate > 0 else None def build(self, input_shape): if self.layer_scale_init_value > 0: self.gamma = self.add_weight(shape=[input_shape[-1]], initializer=initializers.Constant(self.layer_scale_init_value), trainable=True, dtype=tf.float32, name="gamma") else: self.gamma = None def call(self, x, training=False): shortcut = x x = self.dwconv(x) x = self.norm(x, training=training) x = self.pwconv1(x) x = self.act(x) x = self.pwconv2(x) if self.gamma is not None: x = self.gamma * x if self.drop_path is not None: x = self.drop_path(x, training=training) return shortcut + x class Stem(layers.Layer): def __init__(self, dim, name: str = None): super().__init__(name=name) self.conv = layers.Conv2D(dim, kernel_size=4, strides=4, padding="same", kernel_initializer=KERNEL_INITIALIZER, bias_initializer=BIAS_INITIALIZER, name="conv2d") self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm") def call(self, x, training=False): x = self.conv(x) x = self.norm(x, training=training) return x class DownSample(layers.Layer): def __init__(self, dim, name: str = None): super().__init__(name=name) self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm") self.conv = layers.Conv2D(dim, kernel_size=2, strides=2, padding="same", kernel_initializer=KERNEL_INITIALIZER, bias_initializer=BIAS_INITIALIZER, name="conv2d") def call(self, x, training=False): x = self.norm(x, training=training) x = self.conv(x) return x class ConvNeXt(Model): r""" ConvNeXt A Tensorflow impl of : `A ConvNet for the 2020s` - https://arxiv.org/pdf/2201.03545.pdf Args: num_classes (int): Number of classes for classification head. Default: 1000 depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] drop_path_rate (float): Stochastic depth rate. Default: 0. layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. """ def __init__(self, num_classes: int, depths: list, dims: list, drop_path_rate: float = 0., layer_scale_init_value: float = 1e-6): super().__init__() self.stem = Stem(dims[0], name="stem") cur = 0 dp_rates = np.linspace(start=0, stop=drop_path_rate, num=sum(depths)) self.stage1 = [Block(dim=dims[0], drop_rate=dp_rates[cur + i], layer_scale_init_value=layer_scale_init_value, name=f"stage1_block{i}") for i in range(depths[0])] cur += depths[0] self.downsample2 = DownSample(dims[1], name="downsample2") self.stage2 = [Block(dim=dims[1], drop_rate=dp_rates[cur + i], layer_scale_init_value=layer_scale_init_value, name=f"stage2_block{i}") for i in range(depths[1])] cur += depths[1] self.downsample3 = DownSample(dims[2], name="downsample3") self.stage3 = [Block(dim=dims[2], drop_rate=dp_rates[cur + i], layer_scale_init_value=layer_scale_init_value, name=f"stage3_block{i}") for i in range(depths[2])] cur += depths[2] self.downsample4 = DownSample(dims[3], name="downsample4") self.stage4 = [Block(dim=dims[3], drop_rate=dp_rates[cur + i], layer_scale_init_value=layer_scale_init_value, name=f"stage4_block{i}") for i in range(depths[3])] self.norm = layers.LayerNormalization(epsilon=1e-6, name="norm") self.head = layers.Dense(units=num_classes, kernel_initializer=KERNEL_INITIALIZER, bias_initializer=BIAS_INITIALIZER, name="head") def call(self, x, training=False): x = self.stem(x, training=training) for block in self.stage1: x = block(x, training=training) x = self.downsample2(x, training=training) for block in self.stage2: x = block(x, training=training) x = self.downsample3(x, training=training) for block in self.stage3: x = block(x, training=training) x = self.downsample4(x, training=training) for block in self.stage4: x = block(x, training=training) x = tf.reduce_mean(x, axis=[1, 2]) x = self.norm(x, training=training) x = self.head(x) return x def convnext_tiny(num_classes: int): model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], num_classes=num_classes) return model def convnext_small(num_classes: int): model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], num_classes=num_classes) return model def convnext_base(num_classes: int): model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], num_classes=num_classes) return model def convnext_large(num_classes: int): model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], num_classes=num_classes) return model def convnext_xlarge(num_classes: int): model = ConvNeXt(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], num_classes=num_classes) return model ================================================ FILE: tensorflow_classification/ConvNeXt/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import tensorflow as tf import matplotlib.pyplot as plt from model import convnext_tiny as create_model def main(): num_classes = 5 im_height = im_width = 224 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image img = img.resize((im_width, im_height)) plt.imshow(img) # read image img = np.array(img).astype(np.float32) # preprocess img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=num_classes) model.build([1, 224, 224, 3]) weights_path = './save_weights/model.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img, batch_size=1)) result = tf.keras.layers.Softmax()(result) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/ConvNeXt/train.py ================================================ import os import re import sys import datetime import tensorflow as tf from tqdm import tqdm from model import convnext_tiny as create_model from utils import generate_ds, cosine_scheduler assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") batch_size = 8 epochs = 10 num_classes = 5 freeze_layers = False initial_lr = 0.005 weight_decay = 5e-4 log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, batch_size=batch_size, val_rate=0.2) # create model model = create_model(num_classes=num_classes) model.build((1, 224, 224, 3)) # 下载我提前转好的预训练权重 # 链接: https://pan.baidu.com/s/1MtYJ3FCAkiPwaMRKuyZN1Q 密码: 1cgp # load weights pre_weights_path = './convnext_tiny_1k_224.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) # freeze bottom layers if freeze_layers: for layer in model.layers: if "head" not in layer.name: layer.trainable = False else: print("training {}".format(layer.name)) model.summary() # custom learning rate scheduler scheduler = cosine_scheduler(initial_lr, epochs, len(train_ds), train_writer=train_writer) # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) ce_loss = loss_object(train_labels, output) # l2 loss matcher = re.compile(".*(bias|gamma|beta).*") l2loss = weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in model.trainable_variables if not matcher.match(v.name) ]) loss = ce_loss + l2loss gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(ce_loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: # update learning rate optimizer.learning_rate = next(scheduler) train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}, lr:{:.5f}".format( epoch + 1, epochs, train_loss.result(), train_accuracy.result(), optimizer.learning_rate.numpy() ) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # writing training loss and acc with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) # writing validation loss and acc with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() save_name = "./save_weights/model.ckpt" model.save_weights(save_name, save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/ConvNeXt/trans_weights.py ================================================ import torch from model import * def transpose_weights(m_type, w_dict, k, v): if m_type == "conv": if len(v.shape) > 1: # conv weights v = np.transpose(v.numpy(), (2, 3, 1, 0)).astype(np.float32) w_dict[k] = v elif m_type == "dwconv": if len(v.shape) > 1: # dwconv weights v = np.transpose(v.numpy(), (2, 3, 0, 1)).astype(np.float32) w_dict[k] = v elif m_type == "linear": if len(v.shape) > 1: v = np.transpose(v.numpy(), (1, 0)).astype(np.float32) w_dict[k] = v elif m_type == "norm": w_dict[k] = v else: ValueError(f"not support type:{m_type}") def main(weights_path: str, model_name: str, model: tf.keras.Model): var_dict = {v.name.split(':')[0]: v for v in model.weights} weights_dict = torch.load(weights_path, map_location="cpu")["model"] w_dict = {} for k, v in weights_dict.items(): if "downsample_layers" in k: split_k = k.split(".") if split_k[1] == "0": if split_k[2] == "0": k = "stem/conv2d/" + split_k[-1] k = k.replace("weight", "kernel") transpose_weights("conv", w_dict, k, v) else: k = "stem/norm/" + split_k[-1] k = k.replace("weight", "gamma") k = k.replace("bias", "beta") transpose_weights("norm", w_dict, k, v) else: stage = int(split_k[1]) + 1 if split_k[2] == "1": k = f"downsample{stage}/conv2d/" + split_k[-1] k = k.replace("weight", "kernel") transpose_weights("conv", w_dict, k, v) else: k = f"downsample{stage}/norm/" + split_k[-1] k = k.replace("weight", "gamma") k = k.replace("bias", "beta") transpose_weights("norm", w_dict, k, v) elif "stages" in k: split_k = k.split(".") stage = int(split_k[1]) + 1 block = int(split_k[2]) if "dwconv" in k: k = f"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}" k = k.replace("weight", "depthwise_kernel") transpose_weights("dwconv", w_dict, k, v) elif "pwconv" in k: k = f"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}" k = k.replace("weight", "kernel") transpose_weights("linear", w_dict, k, v) elif "norm" in k: k = f"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}" k = k.replace("weight", "gamma") k = k.replace("bias", "beta") transpose_weights("norm", w_dict, k, v) elif "gamma" in k: k = f"stage{stage}_block{block}/{split_k[-1]}" transpose_weights("norm", w_dict, k, v) else: ValueError(f"unrecognized {k}") elif "norm" in k: split_k = k.split(".") k = f"norm/{split_k[-1]}" k = k.replace("weight", "gamma") k = k.replace("bias", "beta") transpose_weights("norm", w_dict, k, v) elif "head" in k: split_k = k.split(".") k = f"head/{split_k[-1]}" k = k.replace("weight", "kernel") transpose_weights("linear", w_dict, k, v) else: ValueError(f"unrecognized {k}") for key, var in var_dict.items(): if key in w_dict: if w_dict[key].shape != var.shape: msg = "shape mismatch: {}".format(key) print(msg) else: var.assign(w_dict[key], read_value=False) else: msg = "Not found {} in {}".format(key, weights_path) print(msg) model.save_weights("./{}.h5".format(model_name)) if __name__ == '__main__': model = convnext_tiny(num_classes=1000) model.build((1, 224, 224, 3)) # https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth main(weights_path="./convnext_tiny_1k_224_ema.pth", model_name="convnext_tiny_1k_224", model=model) # model = convnext_small(num_classes=1000) # model.build((1, 224, 224, 3)) # # https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth # main(weights_path="./convnext_small_1k_224_ema.pth", # model_name="convnext_small_1k_224", # model=model) # model = convnext_base(num_classes=1000) # model.build((1, 224, 224, 3)) # # https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth # main(weights_path="./convnext_base_1k_224_ema.pth", # model_name="convnext_base_1k_224", # model=model) # model = convnext_base(num_classes=21841) # model.build((1, 224, 224, 3)) # # https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth # main(weights_path="./convnext_base_22k_224.pth", # model_name="convnext_base_22k_224", # model=model) # model = convnext_large(num_classes=1000) # model.build((1, 224, 224, 3)) # # https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth # main(weights_path="./convnext_large_1k_224_ema.pth", # model_name="convnext_large_1k_224", # model=model) # model = convnext_large(num_classes=21841) # model.build((1, 224, 224, 3)) # # https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth # main(weights_path="./convnext_large_22k_224.pth", # model_name="convnext_large_22k_224", # model=model) ================================================ FILE: tensorflow_classification/ConvNeXt/utils.py ================================================ import os import json import random import math import numpy as np import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, train_im_height: int = 224, train_im_width: int = 224, val_im_height: int = None, val_im_width: int = None, batch_size: int = 8, val_rate: float = 0.1, cache_data: bool = False): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param train_im_height: 训练输入网络图像的高度 :param train_im_width: 训练输入网络图像的宽度 :param val_im_height: 验证输入网络图像的高度 :param val_im_width: 验证输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :param cache_data: 是否缓存数据 :return: """ assert train_im_height is not None assert train_im_width is not None if val_im_width is None: val_im_width = train_im_width if val_im_height is None: val_im_height = train_im_height train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width) image = tf.image.random_flip_left_right(image) image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width) image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False, cache: bool = False): if cache: ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val, cache=False) return train_ds, val_ds def cosine_rate(now_step, total_step, end_lr_rate): rate = ((1 + math.cos(now_step * math.pi / total_step)) / 2) * (1 - end_lr_rate) + end_lr_rate # cosine return rate def cosine_scheduler(initial_lr, epochs, steps, warmup_epochs=1, end_lr_rate=1e-6, train_writer=None): """custom learning rate scheduler""" assert warmup_epochs < epochs warmup = np.linspace(start=1e-8, stop=initial_lr, num=warmup_epochs*steps) remainder_steps = (epochs - warmup_epochs) * steps cosine = initial_lr * np.array([cosine_rate(i, remainder_steps, end_lr_rate) for i in range(remainder_steps)]) lr_list = np.concatenate([warmup, cosine]) for i in range(len(lr_list)): new_lr = lr_list[i] if train_writer is not None: # writing lr into tensorboard with train_writer.as_default(): tf.summary.scalar('learning rate', data=new_lr, step=i) yield new_lr ================================================ FILE: tensorflow_classification/README.md ================================================ ## 该文件夹存放使用tensorflow实现的代码版本 **model.py**: 是模型文件 **train.py**: 是调用模型训练的文件 **predict.py**: 是调用模型进行预测的文件 **class_indices.json**: 是训练数据集对应的标签文件 ------ 若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。 [点击这里](../data_set/README.md)会告诉你如何去下载数据集,以及提供了现成的划分数据集脚本 ================================================ FILE: tensorflow_classification/Test11_efficientnetV2/model.py ================================================ """ official code: https://github.com/google/automl/tree/master/efficientnetv2 """ import itertools import tensorflow as tf from tensorflow.keras import layers, Model, Input CONV_KERNEL_INITIALIZER = { 'class_name': 'VarianceScaling', 'config': { 'scale': 2.0, 'mode': 'fan_out', 'distribution': 'truncated_normal' } } DENSE_KERNEL_INITIALIZER = { 'class_name': 'VarianceScaling', 'config': { 'scale': 1. / 3., 'mode': 'fan_out', 'distribution': 'uniform' } } class SE(layers.Layer): def __init__(self, se_filters: int, output_filters: int, name: str = None): super(SE, self).__init__(name=name) self.se_reduce = layers.Conv2D(filters=se_filters, kernel_size=1, strides=1, padding="same", activation="swish", use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name="conv2d") self.se_expand = layers.Conv2D(filters=output_filters, kernel_size=1, strides=1, padding="same", activation="sigmoid", use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name="conv2d_1") def call(self, inputs, **kwargs): # Tensor: [N, H, W, C] -> [N, 1, 1, C] se_tensor = tf.reduce_mean(inputs, [1, 2], keepdims=True) se_tensor = self.se_reduce(se_tensor) se_tensor = self.se_expand(se_tensor) return se_tensor * inputs class MBConv(layers.Layer): def __init__(self, kernel_size: int, input_c: int, out_c: int, expand_ratio: int, stride: int, se_ratio: float = 0.25, drop_rate: float = 0., name: str = None): super(MBConv, self).__init__(name=name) if stride not in [1, 2]: raise ValueError("illegal stride value.") self.has_shortcut = (stride == 1 and input_c == out_c) expanded_c = input_c * expand_ratio bid = itertools.count(0) get_norm_name = lambda: 'batch_normalization' + ('' if not next( bid) else '_' + str(next(bid) // 2)) cid = itertools.count(0) get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str( next(cid) // 2)) # 在EfficientNetV2中,MBConv中不存在expansion=1的情况所以conv_pw肯定存在 assert expand_ratio != 1 # Point-wise expansion self.expand_conv = layers.Conv2D( filters=expanded_c, kernel_size=1, strides=1, padding="same", use_bias=False, name=get_conv_name()) self.norm0 = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name=get_norm_name()) self.act0 = layers.Activation("swish") # Depth-wise convolution self.depthwise_conv = layers.DepthwiseConv2D( kernel_size=kernel_size, strides=stride, depthwise_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name="depthwise_conv2d") self.norm1 = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name=get_norm_name()) self.act1 = layers.Activation("swish") # SE num_reduced_filters = max(1, int(input_c * se_ratio)) self.se = SE(num_reduced_filters, expanded_c, name="se") # Point-wise linear projection self.project_conv = layers.Conv2D( filters=out_c, kernel_size=1, strides=1, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name=get_conv_name()) self.norm2 = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name=get_norm_name()) self.drop_rate = drop_rate if self.has_shortcut and drop_rate > 0: # Stochastic Depth self.drop_path = layers.Dropout(rate=drop_rate, noise_shape=(None, 1, 1, 1), # binary dropout mask name="drop_path") def call(self, inputs, training=None): x = inputs x = self.expand_conv(x) x = self.norm0(x, training=training) x = self.act0(x) x = self.depthwise_conv(x) x = self.norm1(x, training=training) x = self.act1(x) x = self.se(x) x = self.project_conv(x) x = self.norm2(x, training=training) if self.has_shortcut: if self.drop_rate > 0: x = self.drop_path(x, training=training) x = tf.add(x, inputs) return x class FusedMBConv(layers.Layer): def __init__(self, kernel_size: int, input_c: int, out_c: int, expand_ratio: int, stride: int, se_ratio: float, drop_rate: float = 0., name: str = None): super(FusedMBConv, self).__init__(name=name) if stride not in [1, 2]: raise ValueError("illegal stride value.") assert se_ratio == 0. self.has_shortcut = (stride == 1 and input_c == out_c) self.has_expansion = expand_ratio != 1 expanded_c = input_c * expand_ratio bid = itertools.count(0) get_norm_name = lambda: 'batch_normalization' + ('' if not next( bid) else '_' + str(next(bid) // 2)) cid = itertools.count(0) get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str( next(cid) // 2)) if expand_ratio != 1: self.expand_conv = layers.Conv2D( filters=expanded_c, kernel_size=kernel_size, strides=stride, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name=get_conv_name()) self.norm0 = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name=get_norm_name()) self.act0 = layers.Activation("swish") self.project_conv = layers.Conv2D( filters=out_c, kernel_size=1 if expand_ratio != 1 else kernel_size, strides=1 if expand_ratio != 1 else stride, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name=get_conv_name()) self.norm1 = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name=get_norm_name()) if expand_ratio == 1: self.act1 = layers.Activation("swish") self.drop_rate = drop_rate if self.has_shortcut and drop_rate > 0: # Stochastic Depth self.drop_path = layers.Dropout(rate=drop_rate, noise_shape=(None, 1, 1, 1), # binary dropout mask name="drop_path") def call(self, inputs, training=None): x = inputs if self.has_expansion: x = self.expand_conv(x) x = self.norm0(x, training=training) x = self.act0(x) x = self.project_conv(x) x = self.norm1(x, training=training) if self.has_expansion is False: x = self.act1(x) if self.has_shortcut: if self.drop_rate > 0: x = self.drop_path(x, training=training) x = tf.add(x, inputs) return x class Stem(layers.Layer): def __init__(self, filters: int, name: str = None): super(Stem, self).__init__(name=name) self.conv_stem = layers.Conv2D( filters=filters, kernel_size=3, strides=2, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name="conv2d") self.norm = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name="batch_normalization") self.act = layers.Activation("swish") def call(self, inputs, training=None): x = self.conv_stem(inputs) x = self.norm(x, training=training) x = self.act(x) return x class Head(layers.Layer): def __init__(self, filters: int = 1280, num_classes: int = 1000, drop_rate: float = 0., name: str = None): super(Head, self).__init__(name=name) self.conv_head = layers.Conv2D( filters=filters, kernel_size=1, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name="conv2d") self.norm = layers.BatchNormalization( axis=-1, momentum=0.9, epsilon=1e-3, name="batch_normalization") self.act = layers.Activation("swish") self.avg = layers.GlobalAveragePooling2D() self.fc = layers.Dense(num_classes, kernel_initializer=DENSE_KERNEL_INITIALIZER) if drop_rate > 0: self.dropout = layers.Dropout(drop_rate) def call(self, inputs, training=None): x = self.conv_head(inputs) x = self.norm(x) x = self.act(x) x = self.avg(x) if self.dropout: x = self.dropout(x, training=training) x = self.fc(x) return x class EfficientNetV2(Model): def __init__(self, model_cnf: list, num_classes: int = 1000, num_features: int = 1280, dropout_rate: float = 0.2, drop_connect_rate: float = 0.2, name: str = None): super(EfficientNetV2, self).__init__(name=name) for cnf in model_cnf: assert len(cnf) == 8 stem_filter_num = model_cnf[0][4] self.stem = Stem(stem_filter_num) total_blocks = sum([i[0] for i in model_cnf]) block_id = 0 self.blocks = [] # Builds blocks. for cnf in model_cnf: repeats = cnf[0] op = FusedMBConv if cnf[-2] == 0 else MBConv for i in range(repeats): self.blocks.append(op(kernel_size=cnf[1], input_c=cnf[4] if i == 0 else cnf[5], out_c=cnf[5], expand_ratio=cnf[3], stride=cnf[2] if i == 0 else 1, se_ratio=cnf[-1], drop_rate=drop_connect_rate * block_id / total_blocks, name="blocks_{}".format(block_id))) block_id += 1 self.head = Head(num_features, num_classes, dropout_rate) # def summary(self, input_shape=(224, 224, 3), **kwargs): # x = Input(shape=input_shape) # model = Model(inputs=[x], outputs=self.call(x, training=True)) # return model.summary() def call(self, inputs, training=None): x = self.stem(inputs, training) # call for blocks. for _, block in enumerate(self.blocks): x = block(x, training=training) x = self.head(x, training=training) return x def efficientnetv2_s(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 300, eval_size: 384 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[2, 3, 1, 1, 24, 24, 0, 0], [4, 3, 2, 4, 24, 48, 0, 0], [4, 3, 2, 4, 48, 64, 0, 0], [6, 3, 2, 4, 64, 128, 1, 0.25], [9, 3, 1, 6, 128, 160, 1, 0.25], [15, 3, 2, 6, 160, 256, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.2, name="efficientnetv2-s") return model def efficientnetv2_m(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 384, eval_size: 480 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[3, 3, 1, 1, 24, 24, 0, 0], [5, 3, 2, 4, 24, 48, 0, 0], [5, 3, 2, 4, 48, 80, 0, 0], [7, 3, 2, 4, 80, 160, 1, 0.25], [14, 3, 1, 6, 160, 176, 1, 0.25], [18, 3, 2, 6, 176, 304, 1, 0.25], [5, 3, 1, 6, 304, 512, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.3, name="efficientnetv2-m") return model def efficientnetv2_l(num_classes: int = 1000): """ EfficientNetV2 https://arxiv.org/abs/2104.00298 """ # train_size: 384, eval_size: 480 # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio model_config = [[4, 3, 1, 1, 32, 32, 0, 0], [7, 3, 2, 4, 32, 64, 0, 0], [7, 3, 2, 4, 64, 96, 0, 0], [10, 3, 2, 4, 96, 192, 1, 0.25], [19, 3, 1, 6, 192, 224, 1, 0.25], [25, 3, 2, 6, 224, 384, 1, 0.25], [7, 3, 1, 6, 384, 640, 1, 0.25]] model = EfficientNetV2(model_cnf=model_config, num_classes=num_classes, dropout_rate=0.4, name="efficientnetv2-l") return model # m = efficientnetv2_s() # m.summary() ================================================ FILE: tensorflow_classification/Test11_efficientnetV2/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import tensorflow as tf import matplotlib.pyplot as plt from model import efficientnetv2_s as create_model def main(): num_classes = 5 img_size = {"s": 384, "m": 480, "l": 480} num_model = "s" im_height = im_width = img_size[num_model] # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image img = img.resize((im_width, im_height)) plt.imshow(img) # read image img = np.array(img).astype(np.float32) # preprocess img = (img / 255. - 0.5) / 0.5 # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=num_classes) weights_path = './save_weights/efficientnetv2.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img)) result = tf.keras.layers.Softmax()(result) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test11_efficientnetV2/train.py ================================================ import os import sys import math import datetime import tensorflow as tf from tqdm import tqdm from model import efficientnetv2_s as create_model from utils import generate_ds assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") img_size = {"s": [300, 384], # train_size, val_size "m": [384, 480], "l": [384, 480]} num_model = "s" batch_size = 8 epochs = 30 num_classes = 5 freeze_layers = True initial_lr = 0.01 log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, train_im_height=img_size[num_model][0], train_im_width=img_size[num_model][0], val_im_height=img_size[num_model][1], val_im_width=img_size[num_model][1], batch_size=batch_size) # create model model = create_model(num_classes=num_classes) model.build((1, img_size[num_model][0], img_size[num_model][0], 3)) # 下载我提前转好的预训练权重 # 链接: https://pan.baidu.com/s/1Pr-pO5sQVySPQnBY8pQH7w 密码: f6hi # load weights pre_weights_path = './efficientnetv2-s.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) # freeze bottom layers if freeze_layers: unfreeze_layers = "head" for layer in model.layers: if unfreeze_layers not in layer.name: layer.trainable = False else: print("training {}".format(layer.name)) model.summary() # custom learning rate curve def scheduler(now_epoch): end_lr_rate = 0.01 # end_lr = initial_lr * end_lr_rate rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate # cosine new_lr = rate * initial_lr # writing lr into tensorboard with train_writer.as_default(): tf.summary.scalar('learning rate', data=new_lr, step=epoch) return new_lr # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) loss = loss_object(train_labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # update learning rate optimizer.learning_rate = scheduler(epoch) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # writing training loss and acc with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) # writing validation loss and acc with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() save_name = "./save_weights/efficientnetv2.ckpt" model.save_weights(save_name, save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test11_efficientnetV2/trans_weights.py ================================================ from model import * def main(ckpt_path: str, model_name: str, model: tf.keras.Model): var_dict = {v.name.split(':')[0]: v for v in model.weights} reader = tf.train.load_checkpoint(ckpt_path) var_shape_map = reader.get_variable_to_shape_map() for key, var in var_dict.items(): key_ = model_name + "/" + key key_ = key_.replace("batch_normalization", "tpu_batch_normalization") if key_ in var_shape_map: if var_shape_map[key_] != var.shape: msg = "shape mismatch: {}".format(key) print(msg) else: var.assign(reader.get_tensor(key_), read_value=False) else: msg = "Not found {} in {}".format(key, ckpt_path) print(msg) model.save_weights("./{}.h5".format(model_name)) if __name__ == '__main__': model = efficientnetv2_s() model.build((1, 224, 224, 3)) main(ckpt_path="./efficientnetv2-s-21k-ft1k/model", model_name="efficientnetv2-s", model=model) # model = efficientnetv2_m() # model.build((1, 224, 224, 3)) # main(ckpt_path="./efficientnetv2-m-21k-ft1k/model", # model_name="efficientnetv2-m", # model=model) # model = efficientnetv2_l() # model.build((1, 224, 224, 3)) # main(ckpt_path="./efficientnetv2-l-21k-ft1k/model", # model_name="efficientnetv2-l", # model=model) ================================================ FILE: tensorflow_classification/Test11_efficientnetV2/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, train_im_height: int = None, train_im_width: int = None, val_im_height: int = None, val_im_width: int = None, batch_size: int = 8, val_rate: float = 0.1, cache_data: bool = False): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param train_im_height: 训练输入网络图像的高度 :param train_im_width: 训练输入网络图像的宽度 :param val_im_height: 验证输入网络图像的高度 :param val_im_width: 验证输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :param cache_data: 是否缓存数据 :return: """ assert train_im_height is not None assert train_im_width is not None if val_im_width is None: val_im_width = train_im_width if val_im_height is None: val_im_height = train_im_height train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width) image = tf.image.random_flip_left_right(image) image = (image / 255. - 0.5) / 0.5 return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width) image = (image / 255. - 0.5) / 0.5 return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False, cache: bool = False): if cache: ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val, cache=False) return train_ds, val_ds ================================================ FILE: tensorflow_classification/Test1_official_demo/model.py ================================================ from tensorflow.keras.layers import Dense, Flatten, Conv2D from tensorflow.keras import Model class MyModel(Model): def __init__(self): super(MyModel, self).__init__() self.conv1 = Conv2D(32, 3, activation='relu') self.flatten = Flatten() self.d1 = Dense(128, activation='relu') self.d2 = Dense(10, activation='softmax') def call(self, x, **kwargs): x = self.conv1(x) # input[batch, 28, 28, 1] output[batch, 26, 26, 32] x = self.flatten(x) # output [batch, 21632] x = self.d1(x) # output [batch, 128] return self.d2(x) # output [batch, 10] ================================================ FILE: tensorflow_classification/Test1_official_demo/train.py ================================================ from __future__ import absolute_import, division, print_function, unicode_literals import tensorflow as tf from model import MyModel def main(): mnist = tf.keras.datasets.mnist # download and load data (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 # Add a channels dimension x_train = x_train[..., tf.newaxis] x_test = x_test[..., tf.newaxis] # create data generator train_ds = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).shuffle(10000).batch(32) test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) # create model model = MyModel() # define loss loss_object = tf.keras.losses.SparseCategoricalCrossentropy() # define optimizer optimizer = tf.keras.optimizers.Adam() # define train_loss and train_accuracy train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') # define train_loss and train_accuracy test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') # define train function including calculating loss, applying gradient and calculating accuracy @tf.function def train_step(images, labels): with tf.GradientTape() as tape: predictions = model(images) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) # define test function including calculating loss and calculating accuracy @tf.function def test_step(images, labels): predictions = model(images) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) EPOCHS = 5 for epoch in range(EPOCHS): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info test_loss.reset_states() # clear history info test_accuracy.reset_states() # clear history info for images, labels in train_ds: train_step(images, labels) for test_images, test_labels in test_ds: test_step(test_images, test_labels) template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch + 1, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test2_alexnet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: tensorflow_classification/Test2_alexnet/fine_train_alexnet.py ================================================ from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt import numpy as np import tensorflow as tf import json import os import glob from tensorflow.keras import layers, models def AlexNet_pytorch(im_height=224, im_width=224, num_classes=1000): # tensorflow中的tensor通道排序是NHWC input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") # output(None, 224, 224, 3) x = layers.ZeroPadding2D(((2, 1), (2, 1)))(input_image) # output(None, 227, 227, 3) x = layers.Conv2D(64, kernel_size=11, strides=4, activation="relu")(x) # output(None, 55, 55, 64) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 27, 27, 64) x = layers.Conv2D(192, kernel_size=5, padding="same", activation="relu")(x) # output(None, 27, 27, 192) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 13, 13, 128) x = layers.Conv2D(384, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 384) x = layers.Conv2D(256, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 256) x = layers.Conv2D(256, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 256) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 6, 6, 256) x = layers.Flatten()(x) # output(None, 6*6*256) x = layers.Dropout(0.5)(x) x = layers.Dense(4096, activation="relu")(x) # output(None, 4096) x = layers.Dropout(0.5)(x) x = layers.Dense(4096, activation="relu")(x) # output(None, 4096) x = layers.Dense(num_classes)(x) # output(None, 5) predict = layers.Softmax()(x) model = models.Model(inputs=input_image, outputs=predict) return model def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 10 def pre_function(img: np.ndarray): # from PIL import Image as im # import numpy as np # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img / 255. img = img - [0.485, 0.456, 0.406] img = img / [0.229, 0.224, 0.225] return img # data generator with data augmentation train_image_generator = ImageDataGenerator(horizontal_flip=True, preprocessing_function=pre_function) validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) model = AlexNet_pytorch(im_height=im_height, im_width=im_width, num_classes=5) pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path) for layer_t in model.layers: if 'conv2d' in layer_t.name: layer_t.trainable = False model.summary() # using keras high level api for training model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]) callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5', save_best_only=True, save_weights_only=True, monitor='val_loss')] # tensorflow2.1 recommend to using fit history = model.fit(x=train_data_gen, steps_per_epoch=total_train // batch_size, epochs=epochs, validation_data=val_data_gen, validation_steps=total_val // batch_size, callbacks=callbacks) # plot loss and accuracy image history_dict = history.history train_loss = history_dict["loss"] train_accuracy = history_dict["accuracy"] val_loss = history_dict["val_loss"] val_accuracy = history_dict["val_accuracy"] # figure 1 plt.figure() plt.plot(range(epochs), train_loss, label='train_loss') plt.plot(range(epochs), val_loss, label='val_loss') plt.legend() plt.xlabel('epochs') plt.ylabel('loss') # figure 2 plt.figure() plt.plot(range(epochs), train_accuracy, label='train_accuracy') plt.plot(range(epochs), val_accuracy, label='val_accuracy') plt.legend() plt.xlabel('epochs') plt.ylabel('accuracy') plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test2_alexnet/model.py ================================================ from tensorflow.keras import layers, models, Model, Sequential def AlexNet_v1(im_height=224, im_width=224, num_classes=1000): # tensorflow中的tensor通道排序是NHWC input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") # output(None, 224, 224, 3) x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image) # output(None, 227, 227, 3) x = layers.Conv2D(48, kernel_size=11, strides=4, activation="relu")(x) # output(None, 55, 55, 48) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 27, 27, 48) x = layers.Conv2D(128, kernel_size=5, padding="same", activation="relu")(x) # output(None, 27, 27, 128) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 13, 13, 128) x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 192) x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 192) x = layers.Conv2D(128, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 128) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 6, 6, 128) x = layers.Flatten()(x) # output(None, 6*6*128) x = layers.Dropout(0.2)(x) x = layers.Dense(2048, activation="relu")(x) # output(None, 2048) x = layers.Dropout(0.2)(x) x = layers.Dense(2048, activation="relu")(x) # output(None, 2048) x = layers.Dense(num_classes)(x) # output(None, 5) predict = layers.Softmax()(x) model = models.Model(inputs=input_image, outputs=predict) return model class AlexNet_v2(Model): def __init__(self, num_classes=1000): super(AlexNet_v2, self).__init__() self.features = Sequential([ layers.ZeroPadding2D(((1, 2), (1, 2))), # output(None, 227, 227, 3) layers.Conv2D(48, kernel_size=11, strides=4, activation="relu"), # output(None, 55, 55, 48) layers.MaxPool2D(pool_size=3, strides=2), # output(None, 27, 27, 48) layers.Conv2D(128, kernel_size=5, padding="same", activation="relu"), # output(None, 27, 27, 128) layers.MaxPool2D(pool_size=3, strides=2), # output(None, 13, 13, 128) layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 192) layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 192) layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 128) layers.MaxPool2D(pool_size=3, strides=2)]) # output(None, 6, 6, 128) self.flatten = layers.Flatten() self.classifier = Sequential([ layers.Dropout(0.2), layers.Dense(1024, activation="relu"), # output(None, 2048) layers.Dropout(0.2), layers.Dense(128, activation="relu"), # output(None, 2048) layers.Dense(num_classes), # output(None, 5) layers.Softmax() ]) def call(self, inputs, **kwargs): x = self.features(inputs) x = self.flatten(x) x = self.classifier(x) return x ================================================ FILE: tensorflow_classification/Test2_alexnet/predict.py ================================================ import os import json from PIL import Image import numpy as np import matplotlib.pyplot as plt from model import AlexNet_v1, AlexNet_v2 def main(): im_height = 224 im_width = 224 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # scaling pixel value to (0-1) img = np.array(img) / 255. # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = AlexNet_v1(num_classes=5) weighs_path = "./save_weights/myAlex.h5" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weighs_path) model.load_weights(weighs_path) # prediction result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test2_alexnet/read_pth.py ================================================ import torch import numpy as np import tensorflow as tf def rename_var(pth_path, new_ckpt_path, num_classes): pytorch_dict = torch.load(pth_path) with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: new_var_list = [] for key, value in pytorch_dict.items(): if key in except_list: continue new_name = key value = value.detach().numpy() if 'features.0' in new_name: new_name = new_name.replace("features.0.weight", "conv2d/kernel") new_name = new_name.replace("features.0.bias", "conv2d/bias") if 'features.3' in new_name: new_name = new_name.replace("features.3.weight", "conv2d_1/kernel") new_name = new_name.replace("features.3.bias", "conv2d_1/bias") if 'features.6' in new_name: new_name = new_name.replace("features.6.weight", "conv2d_2/kernel") new_name = new_name.replace("features.6.bias", "conv2d_2/bias") if 'features.8' in new_name: new_name = new_name.replace("features.8.weight", "conv2d_3/kernel") new_name = new_name.replace("features.8.bias", "conv2d_3/bias") if 'features.10' in new_name: new_name = new_name.replace("features.10.weight", "conv2d_4/kernel") new_name = new_name.replace("features.10.bias", "conv2d_4/bias") if 'classifier.1' in new_name: new_name = new_name.replace("classifier.1.weight", "dense/kernel") new_name = new_name.replace("classifier.1.bias", "dense/bias") if 'classifier.4' in new_name: new_name = new_name.replace("classifier.4.weight", "dense_1/kernel") new_name = new_name.replace("classifier.4.bias", "dense_1/bias") if 'conv2d' in new_name and 'kernel' in new_name: value = np.transpose(value, (2, 3, 1, 0)).astype(np.float32) else: value = np.transpose(value).astype(np.float32) re_var = tf.Variable(value, name=new_name) new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([4096, num_classes]), name="dense_2/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="dense_2/bias") new_var_list.append(re_var) saver = tf.compat.v1.train.Saver(new_var_list) sess.run(tf.compat.v1.global_variables_initializer()) saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) except_list = ['classifier.6.weight', 'classifier.6.bias'] # https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth pth_path = './alexnet-owt-4df8aa71.pth' new_ckpt_path = './pretrain_weights.ckpt' num_classes = 5 rename_var(pth_path, new_ckpt_path, num_classes) ================================================ FILE: tensorflow_classification/Test2_alexnet/train.py ================================================ from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt from model import AlexNet_v1, AlexNet_v2 import tensorflow as tf import json import os def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 10 # data generator with data augmentation train_image_generator = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True) validation_image_generator = ImageDataGenerator(rescale=1. / 255) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) # sample_training_images, sample_training_labels = next(train_data_gen) # label is one-hot coding # # # This function will plot images in the form of a grid with 1 row # # and 5 columns where images are placed in each column. # def plotImages(images_arr): # fig, axes = plt.subplots(1, 5, figsize=(20, 20)) # axes = axes.flatten() # for img, ax in zip(images_arr, axes): # ax.imshow(img) # ax.axis('off') # plt.tight_layout() # plt.show() # # # plotImages(sample_training_images[:5]) model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5) # model = AlexNet_v2(class_num=5) # model.build((batch_size, 224, 224, 3)) # when using subclass model model.summary() # using keras high level api for training model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]) callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5', save_best_only=True, save_weights_only=True, monitor='val_loss')] # tensorflow2.1 recommend to using fit history = model.fit(x=train_data_gen, steps_per_epoch=total_train // batch_size, epochs=epochs, validation_data=val_data_gen, validation_steps=total_val // batch_size, callbacks=callbacks) # plot loss and accuracy image history_dict = history.history train_loss = history_dict["loss"] train_accuracy = history_dict["accuracy"] val_loss = history_dict["val_loss"] val_accuracy = history_dict["val_accuracy"] # figure 1 plt.figure() plt.plot(range(epochs), train_loss, label='train_loss') plt.plot(range(epochs), val_loss, label='val_loss') plt.legend() plt.xlabel('epochs') plt.ylabel('loss') # figure 2 plt.figure() plt.plot(range(epochs), train_accuracy, label='train_accuracy') plt.plot(range(epochs), val_accuracy, label='val_accuracy') plt.legend() plt.xlabel('epochs') plt.ylabel('accuracy') plt.show() # history = model.fit_generator(generator=train_data_gen, # steps_per_epoch=total_train // batch_size, # epochs=epochs, # validation_data=val_data_gen, # validation_steps=total_val // batch_size, # callbacks=callbacks) # # using keras low level api for training # loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) # # train_loss = tf.keras.metrics.Mean(name='train_loss') # train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') # # test_loss = tf.keras.metrics.Mean(name='test_loss') # test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy') # # # @tf.function # def train_step(images, labels): # with tf.GradientTape() as tape: # predictions = model(images, training=True) # loss = loss_object(labels, predictions) # gradients = tape.gradient(loss, model.trainable_variables) # optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # # train_loss(loss) # train_accuracy(labels, predictions) # # # @tf.function # def test_step(images, labels): # predictions = model(images, training=False) # t_loss = loss_object(labels, predictions) # # test_loss(t_loss) # test_accuracy(labels, predictions) # # # best_test_loss = float('inf') # for epoch in range(1, epochs+1): # train_loss.reset_states() # clear history info # train_accuracy.reset_states() # clear history info # test_loss.reset_states() # clear history info # test_accuracy.reset_states() # clear history info # for step in range(total_train // batch_size): # images, labels = next(train_data_gen) # train_step(images, labels) # # for step in range(total_val // batch_size): # test_images, test_labels = next(val_data_gen) # test_step(test_images, test_labels) # # template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' # print(template.format(epoch, # train_loss.result(), # train_accuracy.result() * 100, # test_loss.result(), # test_accuracy.result() * 100)) # if test_loss.result() < best_test_loss: # model.save_weights("./save_weights/myAlex.ckpt", save_format='tf') if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test2_alexnet/trainGPU.py ================================================ import matplotlib.pyplot as plt from model import AlexNet_v1, AlexNet_v2 import tensorflow as tf import json import os import time import glob import random os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" def main(): gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) exit(-1) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 10 # class dict data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))] class_num = len(data_class) class_dict = dict((value, index) for index, value in enumerate(data_class)) # reverse value and key of dict inverse_dict = dict((val, key) for key, val in class_dict.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) # load train images list train_image_list = glob.glob(train_dir+"/*/*.jpg") random.shuffle(train_image_list) train_num = len(train_image_list) assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir) train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list] # load validation images list val_image_list = glob.glob(validation_dir+"/*/*.jpg") random.shuffle(val_image_list) val_num = len(val_image_list) assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir) val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list] print("using {} images for training, {} images for validation.".format(train_num, val_num)) def process_path(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE # load train dataset train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list)) train_dataset = train_dataset.shuffle(buffer_size=train_num)\ .map(process_path, num_parallel_calls=AUTOTUNE)\ .repeat().batch(batch_size).prefetch(AUTOTUNE) # load train dataset val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list)) val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\ .repeat().batch(batch_size) # 实例化模型 model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5) # model = AlexNet_v2(class_num=5) # model.build((batch_size, 224, 224, 3)) # when using subclass model model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: predictions = model(images, training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) @tf.function def test_step(images, labels): predictions = model(images, training=False) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) best_test_loss = float('inf') train_step_num = train_num // batch_size val_step_num = val_num // batch_size for epoch in range(1, epochs+1): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info test_loss.reset_states() # clear history info test_accuracy.reset_states() # clear history info t1 = time.perf_counter() for index, (images, labels) in enumerate(train_dataset): train_step(images, labels) if index+1 == train_step_num: break print(time.perf_counter()-t1) for index, (images, labels) in enumerate(val_dataset): test_step(images, labels) if index+1 == val_step_num: break template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if test_loss.result() < best_test_loss: model.save_weights("./save_weights/myAlex.ckpt".format(epoch), save_format='tf') # # using keras high level api for training # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), # loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), # metrics=["accuracy"]) # # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex_{epoch}.h5', # save_best_only=True, # save_weights_only=True, # monitor='val_loss')] # # # tensorflow2.1 recommend to using fit # history = model.fit(x=train_dataset, # steps_per_epoch=train_num // batch_size, # epochs=epochs, # validation_data=val_dataset, # validation_steps=val_num // batch_size, # callbacks=callbacks) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test3_vgg/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: tensorflow_classification/Test3_vgg/fine_train_vgg16.py ================================================ from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt from model import vgg import tensorflow as tf import json import os import glob def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 10 _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 def pre_function(img): # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img - [_R_MEAN, _G_MEAN, _B_MEAN] return img # data generator with data augmentation train_image_generator = ImageDataGenerator(horizontal_flip=True, preprocessing_function=pre_function) validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) model = vgg("vgg16", 224, 224, 5) pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path) for layer_t in model.layers: if layer_t.name == 'feature': layer_t.trainable = False break model.summary() # using keras high level api for training model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]) callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex_{epoch}.h5', save_best_only=True, save_weights_only=True, monitor='val_loss')] # tensorflow2.1 recommend to using fit history = model.fit(x=train_data_gen, steps_per_epoch=total_train // batch_size, epochs=epochs, validation_data=val_data_gen, validation_steps=total_val // batch_size, callbacks=callbacks) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test3_vgg/model.py ================================================ from tensorflow.keras import layers, Model, Sequential CONV_KERNEL_INITIALIZER = { 'class_name': 'VarianceScaling', 'config': { 'scale': 2.0, 'mode': 'fan_out', 'distribution': 'truncated_normal' } } DENSE_KERNEL_INITIALIZER = { 'class_name': 'VarianceScaling', 'config': { 'scale': 1. / 3., 'mode': 'fan_out', 'distribution': 'uniform' } } def VGG(feature, im_height=224, im_width=224, num_classes=1000): # tensorflow中的tensor通道排序是NHWC input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") x = feature(input_image) x = layers.Flatten()(x) x = layers.Dropout(rate=0.5)(x) x = layers.Dense(2048, activation='relu', kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) x = layers.Dropout(rate=0.5)(x) x = layers.Dense(2048, activation='relu', kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) x = layers.Dense(num_classes, kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) output = layers.Softmax()(x) model = Model(inputs=input_image, outputs=output) return model def make_feature(cfg): feature_layers = [] for v in cfg: if v == "M": feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2)) else: conv2d = layers.Conv2D(v, kernel_size=3, padding="SAME", activation="relu", kernel_initializer=CONV_KERNEL_INITIALIZER) feature_layers.append(conv2d) return Sequential(feature_layers, name="feature") cfgs = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } def vgg(model_name="vgg16", im_height=224, im_width=224, num_classes=1000): assert model_name in cfgs.keys(), "not support model {}".format(model_name) cfg = cfgs[model_name] model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes) return model ================================================ FILE: tensorflow_classification/Test3_vgg/predict.py ================================================ import os import json from PIL import Image import numpy as np import matplotlib.pyplot as plt from model import vgg def main(): im_height = 224 im_width = 224 num_classes = 5 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # scaling pixel value to (0-1) img = np.array(img) / 255. # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = vgg("vgg16", im_height=im_height, im_width=im_width, num_classes=num_classes) weights_path = "./save_weights/myVGG.h5" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weights_path) model.load_weights(weights_path) # prediction result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test3_vgg/read_ckpt.py ================================================ import tensorflow as tf def rename_var(ckpt_path, new_ckpt_path, num_classes=5): with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: var_list = tf.train.list_variables(ckpt_path) new_var_list = [] for var_name, shape in var_list: # print(var_name) if var_name in except_list: continue var = tf.train.load_variable(ckpt_path, var_name) new_var_name = var_name.replace('vgg_16', 'feature') new_var_name = new_var_name.replace("weights", "kernel") new_var_name = new_var_name.replace("biases", "bias") new_var_name = new_var_name.replace("conv1/conv1_1", "conv2d") new_var_name = new_var_name.replace("conv1/conv1_2", "conv2d_1") new_var_name = new_var_name.replace("conv2/conv2_1", "conv2d_2") new_var_name = new_var_name.replace("conv2/conv2_2", "conv2d_3") new_var_name = new_var_name.replace("conv3/conv3_1", "conv2d_4") new_var_name = new_var_name.replace("conv3/conv3_2", "conv2d_5") new_var_name = new_var_name.replace("conv3/conv3_3", "conv2d_6") new_var_name = new_var_name.replace("conv4/conv4_1", "conv2d_7") new_var_name = new_var_name.replace("conv4/conv4_2", "conv2d_8") new_var_name = new_var_name.replace("conv4/conv4_3", "conv2d_9") new_var_name = new_var_name.replace("conv5/conv5_1", "conv2d_10") new_var_name = new_var_name.replace("conv5/conv5_2", "conv2d_11") new_var_name = new_var_name.replace("conv5/conv5_3", "conv2d_12") if 'fc' in new_var_name: # new_var_name = new_var_name.replace("feature/fc6", "dense") # new_var_name = new_var_name.replace("feature/fc7", "dense_1") # new_var_name = new_var_name.replace("fc8", "dense_2") continue # print(new_var_name) re_var = tf.Variable(var, name=new_var_name) new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([25088, 2048]), name="dense/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048]), name="dense/bias") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, 2048]), name="dense_1/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048]), name="dense_1/bias") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="dense_2/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="dense_2/bias") new_var_list.append(re_var) saver = tf.compat.v1.train.Saver(new_var_list) sess.run(tf.compat.v1.global_variables_initializer()) saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) except_list = ['global_step', 'vgg_16/mean_rgb', 'vgg_16/fc8/biases', 'vgg_16/fc8/weights'] # http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz ckpt_path = './vgg_16.ckpt' new_ckpt_path = './pretrain_weights.ckpt' num_classes = 5 rename_var(ckpt_path, new_ckpt_path, num_classes) ================================================ FILE: tensorflow_classification/Test3_vgg/train.py ================================================ from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt from model import vgg import tensorflow as tf import json import os def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 10 # data generator with data augmentation train_image_generator = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True) validation_image_generator = ImageDataGenerator(rescale=1. / 255) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) model = vgg("vgg16", im_height, im_width, num_classes=5) model.summary() # using keras high level api for training model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]) callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG.h5', save_best_only=True, save_weights_only=True, monitor='val_loss')] # tensorflow2.1 recommend to using fit history = model.fit(x=train_data_gen, steps_per_epoch=total_train // batch_size, epochs=epochs, validation_data=val_data_gen, validation_steps=total_val // batch_size, callbacks=callbacks) # plot loss and accuracy image history_dict = history.history train_loss = history_dict["loss"] train_accuracy = history_dict["accuracy"] val_loss = history_dict["val_loss"] val_accuracy = history_dict["val_accuracy"] # figure 1 plt.figure() plt.plot(range(epochs), train_loss, label='train_loss') plt.plot(range(epochs), val_loss, label='val_loss') plt.legend() plt.xlabel('epochs') plt.ylabel('loss') # figure 2 plt.figure() plt.plot(range(epochs), train_accuracy, label='train_accuracy') plt.plot(range(epochs), val_accuracy, label='val_accuracy') plt.legend() plt.xlabel('epochs') plt.ylabel('accuracy') plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test3_vgg/trainGPU.py ================================================ import matplotlib.pyplot as plt from model import vgg import tensorflow as tf import json import os import time import glob import random os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" def main(): gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) exit(-1) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 10 # class dict data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))] class_num = len(data_class) class_dict = dict((value, index) for index, value in enumerate(data_class)) # reverse value and key of dict inverse_dict = dict((val, key) for key, val in class_dict.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) # load train images list train_image_list = glob.glob(train_dir+"/*/*.jpg") random.shuffle(train_image_list) train_num = len(train_image_list) assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir) train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list] # load validation images list val_image_list = glob.glob(validation_dir+"/*/*.jpg") random.shuffle(val_image_list) val_num = len(val_image_list) assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir) val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list] print("using {} images for training, {} images for validation.".format(train_num, val_num)) def process_path(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE # load train dataset train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list)) train_dataset = train_dataset.shuffle(buffer_size=train_num)\ .map(process_path, num_parallel_calls=AUTOTUNE)\ .repeat().batch(batch_size).prefetch(AUTOTUNE) # load train dataset val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list)) val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\ .repeat().batch(batch_size) # 实例化模型 model = vgg("vgg16", 224, 224, 5) model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: predictions = model(images, training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) @tf.function def test_step(images, labels): predictions = model(images, training=False) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) best_test_loss = float('inf') train_step_num = train_num // batch_size val_step_num = val_num // batch_size for epoch in range(1, epochs+1): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info test_loss.reset_states() # clear history info test_accuracy.reset_states() # clear history info t1 = time.perf_counter() for index, (images, labels) in enumerate(train_dataset): train_step(images, labels) if index+1 == train_step_num: break print(time.perf_counter()-t1) for index, (images, labels) in enumerate(val_dataset): test_step(images, labels) if index+1 == val_step_num: break template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if test_loss.result() < best_test_loss: model.save_weights("./save_weights/myVGG.ckpt".format(epoch), save_format='tf') # # using keras high level api for training # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), # loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), # metrics=["accuracy"]) # # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG_{epoch}.h5', # save_best_only=True, # save_weights_only=True, # monitor='val_loss')] # # # tensorflow2.1 recommend to using fit # history = model.fit(x=train_dataset, # steps_per_epoch=train_num // batch_size, # epochs=epochs, # validation_data=val_dataset, # validation_steps=val_num // batch_size, # callbacks=callbacks) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test4_goolenet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: tensorflow_classification/Test4_goolenet/model.py ================================================ from tensorflow.keras import layers, models, Model, Sequential def GoogLeNet(im_height=224, im_width=224, class_num=1000, aux_logits=False): # tensorflow中的tensor通道排序是NHWC input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") # (None, 224, 224, 3) x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", activation="relu", name="conv2d_1")(input_image) # (None, 112, 112, 64) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x) # (None, 56, 56, 64) x = layers.Conv2D(64, kernel_size=1, activation="relu", name="conv2d_2")(x) # (None, 56, 56, 64) x = layers.Conv2D(192, kernel_size=3, padding="SAME", activation="relu", name="conv2d_3")(x) # (None, 56, 56, 192) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x) # (None, 28, 28, 192) x = Inception(64, 96, 128, 16, 32, 32, name="inception_3a")(x) # (None, 28, 28, 256) x = Inception(128, 128, 192, 32, 96, 64, name="inception_3b")(x) # (None, 28, 28, 480) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x) # (None, 14, 14, 480) x = Inception(192, 96, 208, 16, 48, 64, name="inception_4a")(x) if aux_logits: aux1 = InceptionAux(class_num, name="aux_1")(x) # (None, 14, 14, 512) x = Inception(160, 112, 224, 24, 64, 64, name="inception_4b")(x) # (None, 14, 14, 512) x = Inception(128, 128, 256, 24, 64, 64, name="inception_4c")(x) # (None, 14, 14, 512) x = Inception(112, 144, 288, 32, 64, 64, name="inception_4d")(x) if aux_logits: aux2 = InceptionAux(class_num, name="aux_2")(x) # (None, 14, 14, 528) x = Inception(256, 160, 320, 32, 128, 128, name="inception_4e")(x) # (None, 14, 14, 532) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_4")(x) # (None, 7, 7, 832) x = Inception(256, 160, 320, 32, 128, 128, name="inception_5a")(x) # (None, 7, 7, 832) x = Inception(384, 192, 384, 48, 128, 128, name="inception_5b")(x) # (None, 7, 7, 1024) x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x) # (None, 1, 1, 1024) x = layers.Flatten(name="output_flatten")(x) # (None, 1024) x = layers.Dropout(rate=0.4, name="output_dropout")(x) x = layers.Dense(class_num, name="output_dense")(x) # (None, class_num) aux3 = layers.Softmax(name="aux_3")(x) if aux_logits: model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3]) else: model = models.Model(inputs=input_image, outputs=aux3) return model class Inception(layers.Layer): def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs): super(Inception, self).__init__(**kwargs) self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation="relu") self.branch2 = Sequential([ layers.Conv2D(ch3x3red, kernel_size=1, activation="relu"), layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", activation="relu")]) # output_size= input_size self.branch3 = Sequential([ layers.Conv2D(ch5x5red, kernel_size=1, activation="relu"), layers.Conv2D(ch5x5, kernel_size=5, padding="SAME", activation="relu")]) # output_size= input_size self.branch4 = Sequential([ layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"), # caution: default strides==pool_size layers.Conv2D(pool_proj, kernel_size=1, activation="relu")]) # output_size= input_size def call(self, inputs, **kwargs): branch1 = self.branch1(inputs) branch2 = self.branch2(inputs) branch3 = self.branch3(inputs) branch4 = self.branch4(inputs) outputs = layers.concatenate([branch1, branch2, branch3, branch4]) return outputs class InceptionAux(layers.Layer): def __init__(self, num_classes, **kwargs): super(InceptionAux, self).__init__(**kwargs) self.averagePool = layers.AvgPool2D(pool_size=5, strides=3) self.conv = layers.Conv2D(128, kernel_size=1, activation="relu") self.fc1 = layers.Dense(1024, activation="relu") self.fc2 = layers.Dense(num_classes) self.softmax = layers.Softmax() def call(self, inputs, **kwargs): # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 x = self.averagePool(inputs) # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 x = self.conv(x) # N x 128 x 4 x 4 x = layers.Flatten()(x) x = layers.Dropout(rate=0.5)(x) # N x 2048 x = self.fc1(x) x = layers.Dropout(rate=0.5)(x) # N x 1024 x = self.fc2(x) # N x num_classes x = self.softmax(x) return x ================================================ FILE: tensorflow_classification/Test4_goolenet/model_add_bn.py ================================================ from tensorflow.keras import layers, models, Model, Sequential def InceptionV1(im_height=224, im_width=224, class_num=1000, aux_logits=False): # tensorflow中的tensor通道排序是NHWC input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") # (None, 224, 224, 3) x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", use_bias=False, name="conv1/conv")(input_image) x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/bn")(x) x = layers.ReLU()(x) # (None, 112, 112, 64) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x) # (None, 56, 56, 64) x = layers.Conv2D(64, kernel_size=1, use_bias=False, name="conv2/conv")(x) x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv2/bn")(x) x = layers.ReLU()(x) # (None, 56, 56, 64) x = layers.Conv2D(192, kernel_size=3, padding="SAME", use_bias=False, name="conv3/conv")(x) x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv3/bn")(x) x = layers.ReLU()(x) # (None, 56, 56, 192) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x) # (None, 28, 28, 192) x = Inception(64, 96, 128, 16, 32, 32, name="inception3a")(x) # (None, 28, 28, 256) x = Inception(128, 128, 192, 32, 96, 64, name="inception3b")(x) # (None, 28, 28, 480) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x) # (None, 14, 14, 480) x = Inception(192, 96, 208, 16, 48, 64, name="inception4a")(x) if aux_logits: aux1 = InceptionAux(class_num, name="aux1")(x) # (None, 14, 14, 512) x = Inception(160, 112, 224, 24, 64, 64, name="inception4b")(x) # (None, 14, 14, 512) x = Inception(128, 128, 256, 24, 64, 64, name="inception4c")(x) # (None, 14, 14, 512) x = Inception(112, 144, 288, 32, 64, 64, name="inception4d")(x) if aux_logits: aux2 = InceptionAux(class_num, name="aux2")(x) # (None, 14, 14, 528) x = Inception(256, 160, 320, 32, 128, 128, name="inception4e")(x) # (None, 14, 14, 532) x = layers.MaxPool2D(pool_size=2, strides=2, padding="SAME", name="maxpool_4")(x) # (None, 7, 7, 832) x = Inception(256, 160, 320, 32, 128, 128, name="inception5a")(x) # (None, 7, 7, 832) x = Inception(384, 192, 384, 48, 128, 128, name="inception5b")(x) # (None, 7, 7, 1024) x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x) # (None, 1, 1, 1024) x = layers.Flatten(name="output_flatten")(x) # (None, 1024) x = layers.Dropout(rate=0.4, name="output_dropout")(x) x = layers.Dense(class_num, name="fc")(x) # (None, class_num) aux3 = layers.Softmax()(x) if aux_logits: model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3]) else: model = models.Model(inputs=input_image, outputs=aux3) return model class Inception(layers.Layer): def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs): super(Inception, self).__init__(**kwargs) self.branch1 = Sequential([ layers.Conv2D(ch1x1, kernel_size=1, use_bias=False, name="conv"), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="bn"), layers.ReLU()], name="branch1") self.branch2 = Sequential([ layers.Conv2D(ch3x3red, kernel_size=1, use_bias=False, name="0/conv"), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="0/bn"), layers.ReLU(), layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", use_bias=False, name="1/conv"), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="1/bn"), layers.ReLU()], name="branch2") # output_size= input_size self.branch3 = Sequential([ layers.Conv2D(ch5x5red, kernel_size=1, use_bias=False, name="0/conv"), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="0/bn"), layers.ReLU(), layers.Conv2D(ch5x5, kernel_size=3, padding="SAME", use_bias=False, name="1/conv"), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="1/bn"), layers.ReLU()], name="branch3") # output_size= input_size self.branch4 = Sequential([ layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"), # caution: default strides==pool_size layers.Conv2D(pool_proj, kernel_size=1, use_bias=False, name="1/conv"), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="1/bn"), layers.ReLU()], name="branch4") # output_size= input_size def call(self, inputs, **kwargs): branch1 = self.branch1(inputs) branch2 = self.branch2(inputs) branch3 = self.branch3(inputs) branch4 = self.branch4(inputs) outputs = layers.concatenate([branch1, branch2, branch3, branch4]) return outputs class InceptionAux(layers.Layer): def __init__(self, num_classes, **kwargs): super(InceptionAux, self).__init__(**kwargs) self.averagePool = layers.AvgPool2D(pool_size=5, strides=3) self.conv = layers.Conv2D(128, kernel_size=1, use_bias=False, name="conv/conv") self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv/bn") self.rule1 = layers.ReLU() self.fc1 = layers.Dense(1024, activation="relu", name="fc1") self.fc2 = layers.Dense(num_classes, name="fc2") self.softmax = layers.Softmax() def call(self, inputs, **kwargs): # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 x = self.averagePool(inputs) # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 x = self.conv(x) x = self.bn1(x) x = self.rule1(x) # N x 128 x 4 x 4 x = layers.Flatten()(x) x = layers.Dropout(rate=0.5)(x) # N x 2048 x = self.fc1(x) x = layers.Dropout(rate=0.5)(x) # N x 1024 x = self.fc2(x) # N x num_classes x = self.softmax(x) return x ================================================ FILE: tensorflow_classification/Test4_goolenet/predict.py ================================================ import os import glob import json from PIL import Image import numpy as np import matplotlib.pyplot as plt from model import GoogLeNet def main(): im_height = 224 im_width = 224 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # scaling pixel value and normalize img = ((np.array(img) / 255.) - 0.5) / 0.5 # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) model = GoogLeNet(class_num=5, aux_logits=False) model.summary() # model.load_weights("./save_weights/myGoogLenet.h5", by_name=True) # h5 format weights_path = "./save_weights/myGoogLeNet.ckpt" assert len(glob.glob(weights_path + "*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == "__main__": main() ================================================ FILE: tensorflow_classification/Test4_goolenet/read_pth.py ================================================ import torch import numpy as np import tensorflow as tf def rename_var(pth_path, new_ckpt_path, num_classes): pytorch_dict = torch.load(pth_path) with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: new_var_list = [] for key, value in pytorch_dict.items(): if key in except_list: continue new_name = key value = value.detach().numpy() new_name = new_name.replace(".", "/") # 将卷积核的通道顺序由pytorch调整到tensorflow if 'conv/weight' in new_name: new_name = new_name.replace("weight", "kernel") value = np.transpose(value, (2, 3, 1, 0)).astype(np.float32) elif 'bn' in new_name: if "num_batches_tracked" in new_name: continue new_name = new_name.replace("weight", "gamma") new_name = new_name.replace("bias", "beta") new_name = new_name.replace("running_mean", "moving_mean") new_name = new_name.replace("running_var", "moving_variance") value = np.transpose(value).astype(np.float32) elif 'fc1' in new_name: new_name = new_name.replace("weight", "kernel") value = np.transpose(value).astype(np.float32) re_var = tf.Variable(value, name=new_name) new_var_list.append(re_var) # aux1 re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name="aux1/fc2/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="aux1/fc2/bias") new_var_list.append(re_var) # aux2 re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name="aux2/fc2/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="aux2/fc2/bias") new_var_list.append(re_var) # fc re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name="fc/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="fc/bias") new_var_list.append(re_var) saver = tf.compat.v1.train.Saver(new_var_list) sess.run(tf.compat.v1.global_variables_initializer()) saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) # this script only use for model_add_bn.py except_list = ['aux1.fc2.weight', 'aux1.fc2.bias', 'aux2.fc2.weight', 'aux2.fc2.bias', 'fc.weight', 'fc.bias'] # https://download.pytorch.org/models/googlenet-1378be20.pth pth_path = './googlenet-1378be20.pth' new_ckpt_path = './pretrain_weights.ckpt' num_classes = 5 rename_var(pth_path, new_ckpt_path, num_classes) ================================================ FILE: tensorflow_classification/Test4_goolenet/train.py ================================================ import os import sys import json import tensorflow as tf from tqdm import tqdm from tensorflow.keras.preprocessing.image import ImageDataGenerator from model import GoogLeNet def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 30 def pre_function(img): # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img / 255. img = (img - 0.5) * 2.0 return img # data generator with data augmentation train_image_generator = ImageDataGenerator(preprocessing_function=pre_function, horizontal_flip=True) validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) model = GoogLeNet(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True) # model.build((batch_size, 224, 224, 3)) # when using subclass model model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: aux1, aux2, output = model(images, training=True) loss1 = loss_object(labels, aux1) loss2 = loss_object(labels, aux2) loss3 = loss_object(labels, output) loss = loss1 * 0.3 + loss2 * 0.3 + loss3 gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def val_step(images, labels): _, _, output = model(images, training=False) loss = loss_object(labels, output) val_loss(loss) val_accuracy(labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(range(total_train // batch_size), file=sys.stdout) for step in train_bar: images, labels = next(train_data_gen) train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # validate val_bar = tqdm(range(total_val // batch_size), file=sys.stdout) for step in val_bar: val_images, val_labels = next(val_data_gen) val_step(val_images, val_labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() model.save_weights("./save_weights/myGoogLeNet.ckpt") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test4_goolenet/trainGPU.py ================================================ import matplotlib.pyplot as plt from model import GoogLeNet import tensorflow as tf import json import os import time import glob import random os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" def main(): gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) exit(-1) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 30 # class dict data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))] class_num = len(data_class) class_dict = dict((value, index) for index, value in enumerate(data_class)) # reverse value and key of dict inverse_dict = dict((val, key) for key, val in class_dict.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) # load train images list train_image_list = glob.glob(train_dir+"/*/*.jpg") random.shuffle(train_image_list) train_num = len(train_image_list) assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir) train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list] # load validation images list val_image_list = glob.glob(validation_dir+"/*/*.jpg") random.shuffle(val_image_list) val_num = len(val_image_list) assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir) val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list] print("using {} images for training, {} images for validation.".format(train_num, val_num)) def process_train_img(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) image = tf.image.random_flip_left_right(image) image = (image - 0.5) / 0.5 return image, label def process_val_img(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) image = (image - 0.5) / 0.5 return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE # load train dataset train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list)) train_dataset = train_dataset.shuffle(buffer_size=train_num)\ .map(process_train_img, num_parallel_calls=AUTOTUNE)\ .repeat().batch(batch_size).prefetch(AUTOTUNE) # load train dataset val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list)) val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\ .repeat().batch(batch_size) # 实例化模型 model = GoogLeNet(im_height=224, im_width=224, class_num=5, aux_logits=True) model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: aux1, aux2, output = model(images, training=True) loss1 = loss_object(labels, aux1) loss2 = loss_object(labels, aux2) loss3 = loss_object(labels, output) loss = loss1 * 0.3 + loss2 * 0.3 + loss3 gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def test_step(images, labels): _, _, output = model(images, training=False) t_loss = loss_object(labels, output) test_loss(t_loss) test_accuracy(labels, output) best_test_loss = float('inf') train_step_num = train_num // batch_size val_step_num = val_num // batch_size for epoch in range(1, epochs+1): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info test_loss.reset_states() # clear history info test_accuracy.reset_states() # clear history info t1 = time.perf_counter() for index, (images, labels) in enumerate(train_dataset): train_step(images, labels) if index+1 == train_step_num: break print(time.perf_counter()-t1) for index, (images, labels) in enumerate(val_dataset): test_step(images, labels) if index+1 == val_step_num: break template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if test_loss.result() < best_test_loss: model.save_weights("./save_weights/myGoogLeNet.ckpt".format(epoch), save_format='tf') if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test4_goolenet/train_add_bn.py ================================================ import os import sys import json import glob import numpy as np from tqdm import tqdm import tensorflow as tf from tensorflow.keras.preprocessing.image import ImageDataGenerator from model_add_bn import InceptionV1 def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 16 epochs = 30 def pre_function(img: np.ndarray): # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img / 255. img = img - [0.485, 0.456, 0.406] img = img / [0.229, 0.224, 0.225] return img # data generator with data augmentation train_image_generator = ImageDataGenerator(preprocessing_function=pre_function, horizontal_flip=True) validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) model = InceptionV1(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True) # model.build((batch_size, 224, 224, 3)) # when using subclass model pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path) model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: aux1, aux2, output = model(images, training=True) loss1 = loss_object(labels, aux1) loss2 = loss_object(labels, aux2) loss3 = loss_object(labels, output) loss = loss1 * 0.3 + loss2 * 0.3 + loss3 gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def val_step(images, labels): _, _, output = model(images, training=False) loss = loss_object(labels, output) val_loss(loss) val_accuracy(labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(range(total_train // batch_size), file=sys.stdout) for step in train_bar: images, labels = next(train_data_gen) train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # validate val_bar = tqdm(range(total_val // batch_size), file=sys.stdout) for step in val_bar: val_images, val_labels = next(val_data_gen) val_step(val_images, val_labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() model.save_weights("./save_weights/myInceptionV1.ckpt") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test5_resnet/batch_predict.py ================================================ import os import json import glob import tensorflow as tf import numpy as np from PIL import Image from model import resnet50 def main(): im_height = 224 im_width = 224 num_classes = 5 _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 # load images # 指向需要遍历预测的图像文件夹 imgs_root = "/data/imgs" assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist." # 读取指定文件夹下所有jpg图像路径 img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")] # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), f"file: '{json_path}' dose not exist." json_file = open(json_path, "r") class_indict = json.load(json_file) # create model feature = resnet50(num_classes=num_classes, include_top=False) feature.trainable = False model = tf.keras.Sequential([feature, tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(1024, activation="relu"), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(num_classes), tf.keras.layers.Softmax()]) # load weights weights_path = './save_weights/resNet_50.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) batch_size = 8 # 每次预测时将多少张图片打包成一个batch for ids in range(0, len(img_path_list) // batch_size): img_list = [] for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]: assert os.path.exists(img_path), f"file: '{img_path}' dose not exist." img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) # scaling pixel value to (0-1) img = np.array(img).astype(np.float32) img = img - [_R_MEAN, _G_MEAN, _B_MEAN] img_list.append(img) # batch images # 将img_list列表中的所有图像打包成一个batch batch_img = np.stack(img_list, axis=0) # prediction result = model.predict(batch_img) predict_classes = np.argmax(result, axis=1) for index, class_index in enumerate(predict_classes): print_res = "image: {} class: {} prob: {:.3}".format(img_path_list[ids * batch_size + index], class_indict[str(class_index)], result[index][class_index]) print(print_res) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test5_resnet/class_indices.json ================================================ { "0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips" } ================================================ FILE: tensorflow_classification/Test5_resnet/model.py ================================================ from tensorflow.keras import layers, Model, Sequential class BasicBlock(layers.Layer): expansion = 1 def __init__(self, out_channel, strides=1, downsample=None, **kwargs): super(BasicBlock, self).__init__(**kwargs) self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides, padding="SAME", use_bias=False) self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5) # ----------------------------------------- self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1, padding="SAME", use_bias=False) self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5) # ----------------------------------------- self.downsample = downsample self.relu = layers.ReLU() self.add = layers.Add() def call(self, inputs, training=False): identity = inputs if self.downsample is not None: identity = self.downsample(inputs) x = self.conv1(inputs) x = self.bn1(x, training=training) x = self.relu(x) x = self.conv2(x) x = self.bn2(x, training=training) x = self.add([identity, x]) x = self.relu(x) return x class Bottleneck(layers.Layer): """ 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 这么做的好处是能够在top1上提升大概0.5%的准确率。 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch """ expansion = 4 def __init__(self, out_channel, strides=1, downsample=None, **kwargs): super(Bottleneck, self).__init__(**kwargs) self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name="conv1") self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm") # ----------------------------------------- self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False, strides=strides, padding="SAME", name="conv2") self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv2/BatchNorm") # ----------------------------------------- self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name="conv3") self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv3/BatchNorm") # ----------------------------------------- self.relu = layers.ReLU() self.downsample = downsample self.add = layers.Add() def call(self, inputs, training=False): identity = inputs if self.downsample is not None: identity = self.downsample(inputs) x = self.conv1(inputs) x = self.bn1(x, training=training) x = self.relu(x) x = self.conv2(x) x = self.bn2(x, training=training) x = self.relu(x) x = self.conv3(x) x = self.bn3(x, training=training) x = self.add([x, identity]) x = self.relu(x) return x def _make_layer(block, in_channel, channel, block_num, name, strides=1): downsample = None if strides != 1 or in_channel != channel * block.expansion: downsample = Sequential([ layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides, use_bias=False, name="conv1"), layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="BatchNorm") ], name="shortcut") layers_list = [] layers_list.append(block(channel, downsample=downsample, strides=strides, name="unit_1")) for index in range(1, block_num): layers_list.append(block(channel, name="unit_" + str(index + 1))) return Sequential(layers_list, name=name) def _resnet(block, blocks_num, im_width=224, im_height=224, num_classes=1000, include_top=True): # tensorflow中的tensor通道排序是NHWC # (None, 224, 224, 3) input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") x = layers.Conv2D(filters=64, kernel_size=7, strides=2, padding="SAME", use_bias=False, name="conv1")(input_image) x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm")(x) x = layers.ReLU()(x) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME")(x) x = _make_layer(block, x.shape[-1], 64, blocks_num[0], name="block1")(x) x = _make_layer(block, x.shape[-1], 128, blocks_num[1], strides=2, name="block2")(x) x = _make_layer(block, x.shape[-1], 256, blocks_num[2], strides=2, name="block3")(x) x = _make_layer(block, x.shape[-1], 512, blocks_num[3], strides=2, name="block4")(x) if include_top: x = layers.GlobalAvgPool2D()(x) # pool + flatten x = layers.Dense(num_classes, name="logits")(x) predict = layers.Softmax()(x) else: predict = x model = Model(inputs=input_image, outputs=predict) return model def resnet34(im_width=224, im_height=224, num_classes=1000, include_top=True): return _resnet(BasicBlock, [3, 4, 6, 3], im_width, im_height, num_classes, include_top) def resnet50(im_width=224, im_height=224, num_classes=1000, include_top=True): return _resnet(Bottleneck, [3, 4, 6, 3], im_width, im_height, num_classes, include_top) def resnet101(im_width=224, im_height=224, num_classes=1000, include_top=True): return _resnet(Bottleneck, [3, 4, 23, 3], im_width, im_height, num_classes, include_top) ================================================ FILE: tensorflow_classification/Test5_resnet/predict.py ================================================ import os import json import glob import tensorflow as tf import numpy as np from PIL import Image import matplotlib.pyplot as plt from model import resnet50 def main(): im_height = 224 im_width = 224 num_classes = 5 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # scaling pixel value to (0-1) _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 img = np.array(img).astype(np.float32) img = img - [_R_MEAN, _G_MEAN, _B_MEAN] # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model feature = resnet50(num_classes=num_classes, include_top=False) feature.trainable = False model = tf.keras.Sequential([feature, tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(1024, activation="relu"), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(num_classes), tf.keras.layers.Softmax()]) # load weights weights_path = './save_weights/resNet_50.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) # prediction result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test5_resnet/read_ckpt.py ================================================ """ 可直接下载我转好的权重 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg 密码: u199 """ import tensorflow as tf def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list): with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: var_list = tf.train.list_variables(ckpt_path) new_var_list = [] for var_name, shape in var_list: print(var_name) if var_name in except_list: continue var = tf.train.load_variable(ckpt_path, var_name) new_var_name = var_name.replace('resnet_v1_50/', "") new_var_name = new_var_name.replace("bottleneck_v1/", "") new_var_name = new_var_name.replace("shortcut/weights", "shortcut/conv1/kernel") new_var_name = new_var_name.replace("weights", "kernel") new_var_name = new_var_name.replace("biases", "bias") re_var = tf.Variable(var, name=new_var_name) new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="logits/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="logits/bias") new_var_list.append(re_var) saver = tf.compat.v1.train.Saver(new_var_list) sess.run(tf.compat.v1.global_variables_initializer()) saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) def main(): except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights'] ckpt_path = './resnet_v1_50.ckpt' new_ckpt_path = './pretrain_weights.ckpt' num_classes = 5 rename_var(ckpt_path, new_ckpt_path, num_classes, except_list) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test5_resnet/read_h5.py ================================================ import h5py f = h5py.File('./save_weights/resNet_1.h5', 'r') for root_name, g in f.items(): print(root_name) for _, weights_dirs in g.attrs.items(): for i in weights_dirs: name = root_name + "/" + str(i, encoding="utf-8") data = f[name] print(data.value) ================================================ FILE: tensorflow_classification/Test5_resnet/subclassed_model.py ================================================ from tensorflow.keras import layers, Model, Sequential class BasicBlock(layers.Layer): expansion = 1 def __init__(self, out_channel, strides=1, downsample=None, **kwargs): super(BasicBlock, self).__init__(**kwargs) self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides, padding="SAME", use_bias=False) self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5) # ----------------------------------------- self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1, padding="SAME", use_bias=False) self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5) # ----------------------------------------- self.downsample = downsample self.relu = layers.ReLU() self.add = layers.Add() def call(self, inputs, training=False, **kwargs): identity = inputs if self.downsample is not None: identity = self.downsample(inputs) x = self.conv1(inputs) x = self.bn1(x, training=training) x = self.relu(x) x = self.conv2(x) x = self.bn2(x, training=training) x = self.add([identity, x]) x = self.relu(x) return x class Bottleneck(layers.Layer): """ 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 这么做的好处是能够在top1上提升大概0.5%的准确率。 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch """ expansion = 4 def __init__(self, out_channel, strides=1, downsample=None, **kwargs): super(Bottleneck, self).__init__(**kwargs) self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name="conv1") self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv1/BatchNorm") # ----------------------------------------- self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False, strides=strides, padding="SAME", name="conv2") self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv2/BatchNorm") # ----------------------------------------- self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name="conv3") self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv3/BatchNorm") # ----------------------------------------- self.relu = layers.ReLU() self.downsample = downsample self.add = layers.Add() def call(self, inputs, training=False, **kwargs): identity = inputs if self.downsample is not None: identity = self.downsample(inputs) x = self.conv1(inputs) x = self.bn1(x, training=training) x = self.relu(x) x = self.conv2(x) x = self.bn2(x, training=training) x = self.relu(x) x = self.conv3(x) x = self.bn3(x, training=training) x = self.add([x, identity]) x = self.relu(x) return x class ResNet(Model): def __init__(self, block, blocks_num, num_classes=1000, include_top=True, **kwargs): super(ResNet, self).__init__(**kwargs) self.include_top = include_top self.conv1 = layers.Conv2D(filters=64, kernel_size=7, strides=2, padding="SAME", use_bias=False, name="conv1") self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv1/BatchNorm") self.relu1 = layers.ReLU(name="relu1") self.maxpool1 = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool1") self.block1 = self._make_layer(block, True, 64, blocks_num[0], name="block1") self.block2 = self._make_layer(block, False, 128, blocks_num[1], strides=2, name="block2") self.block3 = self._make_layer(block, False, 256, blocks_num[2], strides=2, name="block3") self.block4 = self._make_layer(block, False, 512, blocks_num[3], strides=2, name="block4") if self.include_top: self.avgpool = layers.GlobalAvgPool2D(name="avgpool1") self.fc = layers.Dense(num_classes, name="logits") self.softmax = layers.Softmax() def call(self, inputs, training=False, **kwargs): x = self.conv1(inputs) x = self.bn1(x, training=training) x = self.relu1(x) x = self.maxpool1(x) x = self.block1(x, training=training) x = self.block2(x, training=training) x = self.block3(x, training=training) x = self.block4(x, training=training) if self.include_top: x = self.avgpool(x) x = self.fc(x) x = self.softmax(x) return x def _make_layer(self, block, first_block, channel, block_num, name=None, strides=1): downsample = None if strides != 1 or first_block is True: downsample = Sequential([ layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides, use_bias=False, name="conv1"), layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="BatchNorm") ], name="shortcut") layers_list = [] layers_list.append(block(channel, downsample=downsample, strides=strides, name="unit_1")) for index in range(1, block_num): layers_list.append(block(channel, name="unit_" + str(index + 1))) return Sequential(layers_list, name=name) def resnet34(num_classes=1000, include_top=True): block = BasicBlock block_num = [3, 4, 6, 3] return ResNet(block, block_num, num_classes, include_top) def resnet101(num_classes=1000, include_top=True): block = Bottleneck blocks_num = [3, 4, 23, 3] return ResNet(block, blocks_num, num_classes, include_top) ================================================ FILE: tensorflow_classification/Test5_resnet/train.py ================================================ import os import sys import glob import json import tensorflow as tf from tensorflow.keras.preprocessing.image import ImageDataGenerator from tqdm import tqdm from model import resnet50 def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) im_height = 224 im_width = 224 batch_size = 16 epochs = 20 num_classes = 5 _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 def pre_function(img): # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img - [_R_MEAN, _G_MEAN, _B_MEAN] return img # data generator with data augmentation train_image_generator = ImageDataGenerator(horizontal_flip=True, preprocessing_function=pre_function) validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') # img, _ = next(train_data_gen) total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) feature = resnet50(num_classes=5, include_top=False) # feature.build((None, 224, 224, 3)) # when using subclass model # 直接下载我转好的权重 # download weights 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg 密码: u199 pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path) feature.load_weights(pre_weights_path) feature.trainable = False feature.summary() model = tf.keras.Sequential([feature, tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(1024, activation="relu"), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(num_classes), tf.keras.layers.Softmax()]) # model.build((None, 224, 224, 3)) model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: output = model(images, training=True) loss = loss_object(labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def val_step(images, labels): output = model(images, training=False) loss = loss_object(labels, output) val_loss(loss) val_accuracy(labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(range(total_train // batch_size), file=sys.stdout) for step in train_bar: images, labels = next(train_data_gen) train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # validate val_bar = tqdm(range(total_val // batch_size), file=sys.stdout) for step in val_bar: test_images, test_labels = next(val_data_gen) val_step(test_images, test_labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() model.save_weights("./save_weights/resNet_50.ckpt", save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test5_resnet/trainGPU.py ================================================ import matplotlib.pyplot as plt from model import resnet50 import tensorflow as tf import json import os import time import glob import random os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" def main(): gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) exit(-1) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 batch_size = 32 epochs = 30 # class dict data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))] class_num = len(data_class) class_dict = dict((value, index) for index, value in enumerate(data_class)) # reverse value and key of dict inverse_dict = dict((val, key) for key, val in class_dict.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) # load train images list random.seed(0) train_image_list = glob.glob(train_dir+"/*/*.jpg") random.shuffle(train_image_list) train_num = len(train_image_list) assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir) train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list] # load validation images list val_image_list = glob.glob(validation_dir+"/*/*.jpg") random.shuffle(val_image_list) val_num = len(val_image_list) assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir) val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list] print("using {} images for training, {} images for validation.".format(train_num, val_num)) def process_train_img(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) # image = tf.image.convert_image_dtype(image, tf.float32) image = tf.cast(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) image = tf.image.random_flip_left_right(image) # image = (image - 0.5) / 0.5 image = image - [_R_MEAN, _G_MEAN, _B_MEAN] return image, label def process_val_img(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) # image = tf.image.convert_image_dtype(image, tf.float32) image = tf.cast(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) # image = (image - 0.5) / 0.5 image = image - [_R_MEAN, _G_MEAN, _B_MEAN] return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE # load train dataset train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list)) train_dataset = train_dataset.shuffle(buffer_size=train_num)\ .map(process_train_img, num_parallel_calls=AUTOTUNE)\ .repeat().batch(batch_size).prefetch(AUTOTUNE) # load train dataset val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list)) val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\ .repeat().batch(batch_size) # 实例化模型 feature = resnet50(num_classes=5, include_top=False) pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path + "*")), "cannot find {}".format(pre_weights_path) feature.load_weights(pre_weights_path) feature.trainable = False model = tf.keras.Sequential([feature, tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(1024, activation="relu"), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(5), tf.keras.layers.Softmax()]) model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: output = model(images, training=True) loss = loss_object(labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def test_step(images, labels): output = model(images, training=False) t_loss = loss_object(labels, output) test_loss(t_loss) test_accuracy(labels, output) best_test_loss = float('inf') train_step_num = train_num // batch_size val_step_num = val_num // batch_size for epoch in range(1, epochs+1): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info test_loss.reset_states() # clear history info test_accuracy.reset_states() # clear history info t1 = time.perf_counter() for index, (images, labels) in enumerate(train_dataset): train_step(images, labels) if index+1 == train_step_num: break print(time.perf_counter()-t1) for index, (images, labels) in enumerate(val_dataset): test_step(images, labels) if index+1 == val_step_num: break template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if test_loss.result() < best_test_loss: model.save_weights("./save_weights/myResNet.ckpt", save_format='tf') if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/model_v2.py ================================================ from tensorflow.keras import layers, Model, Sequential def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch class ConvBNReLU(layers.Layer): def __init__(self, out_channel, kernel_size=3, stride=1, **kwargs): super(ConvBNReLU, self).__init__(**kwargs) self.conv = layers.Conv2D(filters=out_channel, kernel_size=kernel_size, strides=stride, padding='SAME', use_bias=False, name='Conv2d') self.bn = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='BatchNorm') self.activation = layers.ReLU(max_value=6.0) def call(self, inputs, training=False): x = self.conv(inputs) x = self.bn(x, training=training) x = self.activation(x) return x class InvertedResidual(layers.Layer): def __init__(self, in_channel, out_channel, stride, expand_ratio, **kwargs): super(InvertedResidual, self).__init__(**kwargs) self.hidden_channel = in_channel * expand_ratio self.use_shortcut = stride == 1 and in_channel == out_channel layer_list = [] if expand_ratio != 1: # 1x1 pointwise conv layer_list.append(ConvBNReLU(out_channel=self.hidden_channel, kernel_size=1, name='expand')) layer_list.extend([ # 3x3 depthwise conv layers.DepthwiseConv2D(kernel_size=3, padding='SAME', strides=stride, use_bias=False, name='depthwise'), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='depthwise/BatchNorm'), layers.ReLU(max_value=6.0), # 1x1 pointwise conv(linear) layers.Conv2D(filters=out_channel, kernel_size=1, strides=1, padding='SAME', use_bias=False, name='project'), layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='project/BatchNorm') ]) self.main_branch = Sequential(layer_list, name='expanded_conv') def call(self, inputs, training=False, **kwargs): if self.use_shortcut: return inputs + self.main_branch(inputs, training=training) else: return self.main_branch(inputs, training=training) def MobileNetV2(im_height=224, im_width=224, num_classes=1000, alpha=1.0, round_nearest=8, include_top=True): block = InvertedResidual input_channel = _make_divisible(32 * alpha, round_nearest) last_channel = _make_divisible(1280 * alpha, round_nearest) inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] input_image = layers.Input(shape=(im_height, im_width, 3), dtype='float32') # conv1 x = ConvBNReLU(input_channel, stride=2, name='Conv')(input_image) # building inverted residual residual blockes for idx, (t, c, n, s) in enumerate(inverted_residual_setting): output_channel = _make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 x = block(x.shape[-1], output_channel, stride, expand_ratio=t)(x) # building last several layers x = ConvBNReLU(last_channel, kernel_size=1, name='Conv_1')(x) if include_top is True: # building classifier x = layers.GlobalAveragePooling2D()(x) # pool + flatten x = layers.Dropout(0.2)(x) output = layers.Dense(num_classes, name='Logits')(x) else: output = x model = Model(inputs=input_image, outputs=output) return model ================================================ FILE: tensorflow_classification/Test6_mobilenet/model_v3.py ================================================ from typing import Union from functools import partial from tensorflow.keras import layers, Model def _make_divisible(ch, divisor=8, min_ch=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_ch is None: min_ch = divisor new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_ch < 0.9 * ch: new_ch += divisor return new_ch def correct_pad(input_size: Union[int, tuple], kernel_size: int): """Returns a tuple for zero-padding for 2D convolution with downsampling. Arguments: input_size: Input tensor size. kernel_size: An integer or tuple/list of 2 integers. Returns: A tuple. """ if isinstance(input_size, int): input_size = (input_size, input_size) kernel_size = (kernel_size, kernel_size) adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) correct = (kernel_size[0] // 2, kernel_size[1] // 2) return ((correct[0] - adjust[0], correct[0]), (correct[1] - adjust[1], correct[1])) class HardSigmoid(layers.Layer): def __init__(self, **kwargs): super(HardSigmoid, self).__init__(**kwargs) self.relu6 = layers.ReLU(6.) def call(self, inputs, **kwargs): x = self.relu6(inputs + 3) * (1. / 6) return x class HardSwish(layers.Layer): def __init__(self, **kwargs): super(HardSwish, self).__init__(**kwargs) self.hard_sigmoid = HardSigmoid() def call(self, inputs, **kwargs): x = self.hard_sigmoid(inputs) * inputs return x def _se_block(inputs, filters, prefix, se_ratio=1 / 4.): # [batch, height, width, channel] -> [batch, channel] x = layers.GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs) # Target shape. Tuple of integers, does not include the samples dimension (batch size). # [batch, channel] -> [batch, 1, 1, channel] x = layers.Reshape((1, 1, filters))(x) # fc1 x = layers.Conv2D(filters=_make_divisible(filters * se_ratio), kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv')(x) x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x) # fc2 x = layers.Conv2D(filters=filters, kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv_1')(x) x = HardSigmoid(name=prefix + 'squeeze_excite/HardSigmoid')(x) x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) return x def _inverted_res_block(x, input_c: int, # input channel kernel_size: int, # kennel size exp_c: int, # expanded channel out_c: int, # out channel use_se: bool, # whether using SE activation: str, # RE or HS stride: int, block_id: int, alpha: float = 1.0): bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99) input_c = _make_divisible(input_c * alpha) exp_c = _make_divisible(exp_c * alpha) out_c = _make_divisible(out_c * alpha) act = layers.ReLU if activation == "RE" else HardSwish shortcut = x prefix = 'expanded_conv/' if block_id: # expand channel prefix = 'expanded_conv_{}/'.format(block_id) x = layers.Conv2D(filters=exp_c, kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')(x) x = bn(name=prefix + 'expand/BatchNorm')(x) x = act(name=prefix + 'expand/' + act.__name__)(x) if stride == 2: input_size = (x.shape[1], x.shape[2]) # height, width x = layers.ZeroPadding2D(padding=correct_pad(input_size, kernel_size), name=prefix + 'depthwise/pad')(x) x = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=stride, padding='same' if stride == 1 else 'valid', use_bias=False, name=prefix + 'depthwise')(x) x = bn(name=prefix + 'depthwise/BatchNorm')(x) x = act(name=prefix + 'depthwise/' + act.__name__)(x) if use_se: x = _se_block(x, filters=exp_c, prefix=prefix) x = layers.Conv2D(filters=out_c, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')(x) x = bn(name=prefix + 'project/BatchNorm')(x) if stride == 1 and input_c == out_c: x = layers.Add(name=prefix + 'Add')([shortcut, x]) return x def mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1000, alpha=1.0, include_top=True): """ download weights url: 链接: https://pan.baidu.com/s/13uJznKeqHkjUp72G_gxe8Q 密码: 8quu """ bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99) img_input = layers.Input(shape=input_shape) x = layers.Conv2D(filters=16, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name="Conv")(img_input) x = bn(name="Conv/BatchNorm")(x) x = HardSwish(name="Conv/HardSwish")(x) inverted_cnf = partial(_inverted_res_block, alpha=alpha) # input, input_c, k_size, expand_c, use_se, activation, stride, block_id x = inverted_cnf(x, 16, 3, 16, 16, False, "RE", 1, 0) x = inverted_cnf(x, 16, 3, 64, 24, False, "RE", 2, 1) x = inverted_cnf(x, 24, 3, 72, 24, False, "RE", 1, 2) x = inverted_cnf(x, 24, 5, 72, 40, True, "RE", 2, 3) x = inverted_cnf(x, 40, 5, 120, 40, True, "RE", 1, 4) x = inverted_cnf(x, 40, 5, 120, 40, True, "RE", 1, 5) x = inverted_cnf(x, 40, 3, 240, 80, False, "HS", 2, 6) x = inverted_cnf(x, 80, 3, 200, 80, False, "HS", 1, 7) x = inverted_cnf(x, 80, 3, 184, 80, False, "HS", 1, 8) x = inverted_cnf(x, 80, 3, 184, 80, False, "HS", 1, 9) x = inverted_cnf(x, 80, 3, 480, 112, True, "HS", 1, 10) x = inverted_cnf(x, 112, 3, 672, 112, True, "HS", 1, 11) x = inverted_cnf(x, 112, 5, 672, 160, True, "HS", 2, 12) x = inverted_cnf(x, 160, 5, 960, 160, True, "HS", 1, 13) x = inverted_cnf(x, 160, 5, 960, 160, True, "HS", 1, 14) last_c = _make_divisible(160 * 6 * alpha) last_point_c = _make_divisible(1280 * alpha) x = layers.Conv2D(filters=last_c, kernel_size=1, padding='same', use_bias=False, name="Conv_1")(x) x = bn(name="Conv_1/BatchNorm")(x) x = HardSwish(name="Conv_1/HardSwish")(x) if include_top is True: x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape((1, 1, last_c))(x) # fc1 x = layers.Conv2D(filters=last_point_c, kernel_size=1, padding='same', name="Conv_2")(x) x = HardSwish(name="Conv_2/HardSwish")(x) # fc2 x = layers.Conv2D(filters=num_classes, kernel_size=1, padding='same', name='Logits/Conv2d_1c_1x1')(x) x = layers.Flatten()(x) x = layers.Softmax(name="Predictions")(x) model = Model(img_input, x, name="MobilenetV3large") return model def mobilenet_v3_small(input_shape=(224, 224, 3), num_classes=1000, alpha=1.0, include_top=True): """ download weights url: 链接: https://pan.baidu.com/s/1vrQ_6HdDTHL1UUAN6nSEcw 密码: rrf0 """ bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99) img_input = layers.Input(shape=input_shape) x = layers.Conv2D(filters=16, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name="Conv")(img_input) x = bn(name="Conv/BatchNorm")(x) x = HardSwish(name="Conv/HardSwish")(x) inverted_cnf = partial(_inverted_res_block, alpha=alpha) # input, input_c, k_size, expand_c, use_se, activation, stride, block_id x = inverted_cnf(x, 16, 3, 16, 16, True, "RE", 2, 0) x = inverted_cnf(x, 16, 3, 72, 24, False, "RE", 2, 1) x = inverted_cnf(x, 24, 3, 88, 24, False, "RE", 1, 2) x = inverted_cnf(x, 24, 5, 96, 40, True, "HS", 2, 3) x = inverted_cnf(x, 40, 5, 240, 40, True, "HS", 1, 4) x = inverted_cnf(x, 40, 5, 240, 40, True, "HS", 1, 5) x = inverted_cnf(x, 40, 5, 120, 48, True, "HS", 1, 6) x = inverted_cnf(x, 48, 5, 144, 48, True, "HS", 1, 7) x = inverted_cnf(x, 48, 5, 288, 96, True, "HS", 2, 8) x = inverted_cnf(x, 96, 5, 576, 96, True, "HS", 1, 9) x = inverted_cnf(x, 96, 5, 576, 96, True, "HS", 1, 10) last_c = _make_divisible(96 * 6 * alpha) last_point_c = _make_divisible(1024 * alpha) x = layers.Conv2D(filters=last_c, kernel_size=1, padding='same', use_bias=False, name="Conv_1")(x) x = bn(name="Conv_1/BatchNorm")(x) x = HardSwish(name="Conv_1/HardSwish")(x) if include_top is True: x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape((1, 1, last_c))(x) # fc1 x = layers.Conv2D(filters=last_point_c, kernel_size=1, padding='same', name="Conv_2")(x) x = HardSwish(name="Conv_2/HardSwish")(x) # fc2 x = layers.Conv2D(filters=num_classes, kernel_size=1, padding='same', name='Logits/Conv2d_1c_1x1')(x) x = layers.Flatten()(x) x = layers.Softmax(name="Predictions")(x) model = Model(img_input, x, name="MobilenetV3large") return model ================================================ FILE: tensorflow_classification/Test6_mobilenet/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import matplotlib.pyplot as plt import tensorflow as tf from model_v2 import MobileNetV2 def main(): im_height = 224 im_width = 224 num_classes = 5 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # scaling pixel value to (-1,1) img = np.array(img).astype(np.float32) img = ((img / 255.) - 0.5) * 2.0 # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model feature = MobileNetV2(include_top=False) model = tf.keras.Sequential([feature, tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(num_classes), tf.keras.layers.Softmax()]) weights_path = './save_weights/resMobileNetV2.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/read_ckpt.py ================================================ """ 建议直接下载使用我转好的权重 链接: https://pan.baidu.com/s/1YgFoIKHqooMrTQg_IqI2hA 密码: 2qht """ import tensorflow as tf def rename_var(ckpt_path, new_ckpt_path, num_classes, except_list): with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: var_list = tf.train.list_variables(ckpt_path) new_var_list = [] for var_name, shape in var_list: # print(var_name) if var_name in except_list: continue if "RMSProp" in var_name or "Exponential" in var_name: continue var = tf.train.load_variable(ckpt_path, var_name) new_var_name = var_name.replace('MobilenetV2/', "") new_var_name = new_var_name.replace("/expand/weights", "/expand/Conv2d/weights") new_var_name = new_var_name.replace("Conv/weights", "Conv/Conv2d/kernel") new_var_name = new_var_name.replace("Conv_1/weights", "Conv_1/Conv2d/kernel") new_var_name = new_var_name.replace("weights", "kernel") new_var_name = new_var_name.replace("biases", "bias") first_word = new_var_name.split('/')[0] if "expanded_conv" in first_word: last_word = first_word.split('expanded_conv')[-1] if len(last_word) > 0: new_word = "inverted_residual" + last_word + "/expanded_conv/" else: new_word = "inverted_residual/expanded_conv/" new_var_name = new_word + new_var_name.split('/', maxsplit=1)[-1] print(new_var_name) re_var = tf.Variable(var, name=new_var_name) new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([1280, num_classes]), name="Logits/kernel") new_var_list.append(re_var) re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="Logits/bias") new_var_list.append(re_var) tf.keras.initializers.he_uniform() saver = tf.compat.v1.train.Saver(new_var_list) sess.run(tf.compat.v1.global_variables_initializer()) saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False) def main(): except_list = ['global_step', 'MobilenetV2/Logits/Conv2d_1c_1x1/biases', 'MobilenetV2/Logits/Conv2d_1c_1x1/weights'] ckpt_path = './pretrain_model/mobilenet_v2_1.0_224.ckpt' new_ckpt_path = './pretrain_weights.ckpt' num_classes = 5 rename_var(ckpt_path, new_ckpt_path, num_classes, except_list) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/trainGPU_mobilenet_v2.py ================================================ from model_v2 import MobileNetV2 import tensorflow as tf import json import os import time import glob import random os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" def main(): gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) exit(-1) data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) # create direction for saving weights if not os.path.exists("save_weights"): os.makedirs("save_weights") im_height = 224 im_width = 224 batch_size = 32 epochs = 30 # class dict data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))] class_num = len(data_class) class_dict = dict((value, index) for index, value in enumerate(data_class)) # reverse value and key of dict inverse_dict = dict((val, key) for key, val in class_dict.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) # load train images list train_image_list = glob.glob(train_dir+"/*/*.jpg") random.shuffle(train_image_list) train_num = len(train_image_list) assert train_num > 0, "cannot find any .jpg file in {}".format(train_dir) train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list] # load validation images list val_image_list = glob.glob(validation_dir+"/*/*.jpg") random.shuffle(val_image_list) val_num = len(val_image_list) assert val_num > 0, "cannot find any .jpg file in {}".format(validation_dir) val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list] print("using {} images for training, {} images for validation.".format(train_num, val_num)) def process_train_img(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) image = tf.image.random_flip_left_right(image) # image = (image - 0.5) / 0.5 image = (image - 0.5) * 2.0 return image, label def process_val_img(img_path, label): label = tf.one_hot(label, depth=class_num) image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image) image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize(image, [im_height, im_width]) # image = (image - 0.5) / 0.5 image = (image - 0.5) * 2.0 return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE # load train dataset train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list)) train_dataset = train_dataset.shuffle(buffer_size=train_num)\ .map(process_train_img, num_parallel_calls=AUTOTUNE)\ .repeat().batch(batch_size).prefetch(AUTOTUNE) # load train dataset val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list)) val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\ .repeat().batch(batch_size) # 实例化模型 model = MobileNetV2(num_classes=5) pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path + "*")), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path) for layer_t in model.layers[:-1]: layer_t.trainable = False model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: output = model(images, training=True) loss = loss_object(labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def test_step(images, labels): output = model(images, training=False) t_loss = loss_object(labels, output) test_loss(t_loss) test_accuracy(labels, output) best_test_loss = float('inf') train_step_num = train_num // batch_size val_step_num = val_num // batch_size for epoch in range(1, epochs+1): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info test_loss.reset_states() # clear history info test_accuracy.reset_states() # clear history info t1 = time.perf_counter() for index, (images, labels) in enumerate(train_dataset): train_step(images, labels) if index+1 == train_step_num: break print(time.perf_counter()-t1) for index, (images, labels) in enumerate(val_dataset): test_step(images, labels) if index+1 == val_step_num: break template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) if test_loss.result() < best_test_loss: model.save_weights("./save_weights/myMobileNet.ckpt".format(epoch), save_format='tf') if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/train_mobilenet_v2.py ================================================ import os import sys import glob import json import tensorflow as tf from tensorflow.keras.preprocessing.image import ImageDataGenerator from tqdm import tqdm from model_v2 import MobileNetV2 def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) im_height = 224 im_width = 224 batch_size = 16 epochs = 20 num_classes = 5 def pre_function(img): # img = im.open('test.jpg') # img = np.array(img).astype(np.float32) img = img / 255. img = (img - 0.5) * 2.0 return img # data generator with data augmentation train_image_generator = ImageDataGenerator(horizontal_flip=True, preprocessing_function=pre_function) validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function) train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') # img, _ = next(train_data_gen) total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) # create model except fc layer feature = MobileNetV2(include_top=False) # download weights 链接: https://pan.baidu.com/s/1YgFoIKHqooMrTQg_IqI2hA 密码: 2qht pre_weights_path = './pretrain_weights.ckpt' assert len(glob.glob(pre_weights_path+"*")), "cannot find {}".format(pre_weights_path) feature.load_weights(pre_weights_path) feature.trainable = False feature.summary() # add last fc layer model = tf.keras.Sequential([feature, tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dropout(rate=0.5), tf.keras.layers.Dense(num_classes), tf.keras.layers.Softmax()]) model.summary() # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: output = model(images, training=True) loss = loss_object(labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def val_step(images, labels): output = model(images, training=False) loss = loss_object(labels, output) val_loss(loss) val_accuracy(labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(range(total_train // batch_size), file=sys.stdout) for step in train_bar: images, labels = next(train_data_gen) train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # validate val_bar = tqdm(range(total_val // batch_size), file=sys.stdout) for step in val_bar: val_images, val_labels = next(val_data_gen) val_step(val_images, val_labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() model.save_weights("./save_weights/resMobileNetV2.ckpt", save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/train_mobilenet_v3.py ================================================ import os import sys import tensorflow as tf from tqdm import tqdm from model_v3 import mobilenet_v3_large from utils import generate_ds assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") im_height = 224 im_width = 224 batch_size = 16 epochs = 20 num_classes = 5 freeze_layer = False # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size) # create model model = mobilenet_v3_large(input_shape=(im_height, im_width, 3), num_classes=num_classes, include_top=True) # load weights pre_weights_path = './weights_mobilenet_v3_large_224_1.0_float.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) if freeze_layer is True: # freeze layer, only training 2 last layers for layer in model.layers: if layer.name not in ["Conv_2", "Logits/Conv2d_1c_1x1"]: layer.trainable = False else: print("training: " + layer.name) model.summary() # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) loss = loss_object(train_labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() model.save_weights("./save_weights/resMobileNetV3.ckpt", save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/trans_v3_weights.py ================================================ import re import tensorflow as tf from model_v3 import mobilenet_v3_large def change_word(word: str): word = word.replace("MobilenetV3/", "") if "weights" in word: word = word.replace("weights", "kernel") elif "Conv" in word and "biases" in word: word = word.replace("biases", "bias") return word def rename_var(ckpt_path, m_info): with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess: var_list = tf.train.list_variables(ckpt_path) pattern = "ExponentialMovingAverage|Momentum|global_step" var_dict = dict((change_word(name), [name, shape]) for name, shape in var_list if len(re.findall(pattern, name)) == 0) for k, v in m_info: assert k in var_dict, "{} not in var_dict".format(k) assert v == var_dict[k][1], "shape {} not equal {}".format(v, var_dict[k][1]) weights = [] for k, _ in m_info: var = tf.train.load_variable(ckpt_path, var_dict[k][0]) weights.append(var) return weights def main(): # https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz ckpt_path = './v3-large_224_1.0_float/pristine/model.ckpt-540000' save_path = './pre_mobilev3.h5' m = mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1001, include_top=True) m_info = [(i.name.replace(":0", ""), list(i.shape)) for i in m.weights] weights = rename_var(ckpt_path, m_info) m.set_weights(weights) m.save_weights(save_path) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test6_mobilenet/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, im_height: int, im_width: int, batch_size: int, val_rate: float = 0.1): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param im_height: 输入网络图像的高度 :param im_width: 输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :return: """ train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # image = tf.cast(image, tf.float32) # image = tf.image.resize(image, [im_height, im_width]) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) image = tf.image.random_flip_left_right(image) image = (image - 0.5) / 0.5 return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # image = tf.cast(image, tf.float32) # image = tf.image.resize(image, [im_height, im_width]) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) image = (image - 0.5) / 0.5 return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False): ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val) return train_ds, val_ds ================================================ FILE: tensorflow_classification/Test7_shuffleNet/model.py ================================================ import tensorflow as tf from tensorflow.keras import layers, Model class ConvBNReLU(layers.Layer): def __init__(self, filters: int = 1, kernel_size: int = 1, strides: int = 1, padding: str = 'same', **kwargs): super(ConvBNReLU, self).__init__(**kwargs) self.conv = layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, kernel_regularizer=tf.keras.regularizers.l2(4e-5), name="conv1") self.bn = layers.BatchNormalization(momentum=0.9, name="bn") self.relu = layers.ReLU() def call(self, inputs, training=None, **kwargs): x = self.conv(inputs) x = self.bn(x, training=training) x = self.relu(x) return x class DWConvBN(layers.Layer): def __init__(self, kernel_size: int = 3, strides: int = 1, padding: str = 'same', **kwargs): super(DWConvBN, self).__init__(**kwargs) self.dw_conv = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, kernel_regularizer=tf.keras.regularizers.l2(4e-5), name="dw1") self.bn = layers.BatchNormalization(momentum=0.9, name="bn") def call(self, inputs, training=None, **kwargs): x = self.dw_conv(inputs) x = self.bn(x, training=training) return x class ChannelShuffle(layers.Layer): def __init__(self, shape, groups: int = 2, **kwargs): super(ChannelShuffle, self).__init__(**kwargs) batch_size, height, width, num_channels = shape assert num_channels % 2 == 0 channel_per_group = num_channels // groups # Tuple of integers, does not include the samples dimension (batch size). self.reshape1 = layers.Reshape((height, width, groups, channel_per_group)) self.reshape2 = layers.Reshape((height, width, num_channels)) def call(self, inputs, **kwargs): x = self.reshape1(inputs) x = tf.transpose(x, perm=[0, 1, 2, 4, 3]) x = self.reshape2(x) return x class ChannelSplit(layers.Layer): def __init__(self, num_splits: int = 2, **kwargs): super(ChannelSplit, self).__init__(**kwargs) self.num_splits = num_splits def call(self, inputs, **kwargs): b1, b2 = tf.split(inputs, num_or_size_splits=self.num_splits, axis=-1) return b1, b2 def shuffle_block_s1(inputs, output_c: int, stride: int, prefix: str): if stride != 1: raise ValueError("illegal stride value.") assert output_c % 2 == 0 branch_c = output_c // 2 x1, x2 = ChannelSplit(name=prefix + "/split")(inputs) # main branch x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv1")(x2) x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b2_dw1")(x2) x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv2")(x2) x = layers.Concatenate(name=prefix + "/concat")([x1, x2]) x = ChannelShuffle(x.shape, name=prefix + "/channelshuffle")(x) return x def shuffle_block_s2(inputs, output_c: int, stride: int, prefix: str): if stride != 2: raise ValueError("illegal stride value.") assert output_c % 2 == 0 branch_c = output_c // 2 # shortcut branch x1 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b1_dw1")(inputs) x1 = ConvBNReLU(filters=branch_c, name=prefix + "/b1_conv1")(x1) # main branch x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv1")(inputs) x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b2_dw1")(x2) x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv2")(x2) x = layers.Concatenate(name=prefix + "/concat")([x1, x2]) x = ChannelShuffle(x.shape, name=prefix + "/channelshuffle")(x) return x def shufflenet_v2(num_classes: int, input_shape: tuple, stages_repeats: list, stages_out_channels: list): img_input = layers.Input(shape=input_shape) if len(stages_repeats) != 3: raise ValueError("expected stages_repeats as list of 3 positive ints") if len(stages_out_channels) != 5: raise ValueError("expected stages_out_channels as list of 5 positive ints") x = ConvBNReLU(filters=stages_out_channels[0], kernel_size=3, strides=2, name="conv1")(img_input) x = layers.MaxPooling2D(pool_size=(3, 3), strides=2, padding='same', name="maxpool")(x) stage_name = ["stage{}".format(i) for i in [2, 3, 4]] for name, repeats, output_channels in zip(stage_name, stages_repeats, stages_out_channels[1:]): for i in range(repeats): if i == 0: x = shuffle_block_s2(x, output_c=output_channels, stride=2, prefix=name + "_{}".format(i)) else: x = shuffle_block_s1(x, output_c=output_channels, stride=1, prefix=name + "_{}".format(i)) x = ConvBNReLU(filters=stages_out_channels[-1], name="conv5")(x) x = layers.GlobalAveragePooling2D(name="globalpool")(x) x = layers.Dense(units=num_classes, name="fc")(x) x = layers.Softmax()(x) model = Model(img_input, x, name="ShuffleNetV2_1.0") return model def shufflenet_v2_x1_0(num_classes=1000, input_shape=(224, 224, 3)): # 权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw 密码: mhts model = shufflenet_v2(num_classes=num_classes, input_shape=input_shape, stages_repeats=[4, 8, 4], stages_out_channels=[24, 116, 232, 464, 1024]) return model def shufflenet_v2_x0_5(num_classes=1000, input_shape=(224, 224, 3)): model = shufflenet_v2(num_classes=num_classes, input_shape=input_shape, stages_repeats=[4, 8, 4], stages_out_channels=[24, 48, 96, 192, 1024]) return model def shufflenet_v2_x2_0(num_classes=1000, input_shape=(224, 224, 3)): model = shufflenet_v2(num_classes=num_classes, input_shape=input_shape, stages_repeats=[4, 8, 4], stages_out_channels=[24, 244, 488, 976, 2048]) return model ================================================ FILE: tensorflow_classification/Test7_shuffleNet/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import matplotlib.pyplot as plt from model import shufflenet_v2_x1_0 def main(): im_height = 224 im_width = 224 num_classes = 5 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # scaling pixel value to (-1,1) img = np.array(img).astype(np.float32) img = (img / 255. - mean) / std # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = shufflenet_v2_x1_0(num_classes=num_classes) weights_path = './save_weights/shufflenetv2.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test7_shuffleNet/train.py ================================================ import os import sys import math import datetime import tensorflow as tf from tqdm import tqdm from model import shufflenet_v2_x1_0 from utils import generate_ds assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") im_height = 224 im_width = 224 batch_size = 16 epochs = 30 num_classes = 5 log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size) # create model model = shufflenet_v2_x1_0(input_shape=(im_height, im_width, 3), num_classes=num_classes) # load weights # x1.0权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw 密码: mhts pre_weights_path = './shufflenetv2_x1_0.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) model.summary() # custom learning rate curve def scheduler(now_epoch): initial_lr = 0.1 end_lr_rate = 0.1 # end_lr = initial_lr * end_lr_rate rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate # cosine new_lr = rate * initial_lr # writing lr into tensorboard with train_writer.as_default(): tf.summary.scalar('learning rate', data=new_lr, step=epoch) return new_lr # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) loss = loss_object(train_labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # update learning rate optimizer.learning_rate = scheduler(epoch) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # writing training loss and acc with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) # writing validation loss and acc with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() model.save_weights("./save_weights/shufflenetv2.ckpt", save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test7_shuffleNet/trans_weights.py ================================================ import torch import numpy as np from model import shufflenet_v2_x1_0 def main(): m = shufflenet_v2_x1_0() m_info = [(i.name.replace(":0", ""), list(i.shape)) for i in m.weights] weights_path = './shufflenetv2_x1.pth' weights_dict = torch.load(weights_path) new_weights_dict = dict() for key, value in weights_dict.items(): if "conv1.0.weight" == key: value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32) new_weights_dict["conv1/conv1/kernel"] = value elif "conv1.1.weight" == key: new_weights_dict["conv1/bn/gamma"] = value elif "conv1.1.bias" == key: new_weights_dict["conv1/bn/beta"] = value elif "conv1.1.running_mean" == key: new_weights_dict["conv1/bn/moving_mean"] = value elif "conv1.1.running_var" == key: new_weights_dict["conv1/bn/moving_variance"] = value elif "stage" in key: names = key.split(".branch") num_stage, num_block = names[0].replace("stage", "").split(".") tf_name_prefix = "stage{}_{}/".format(num_stage, num_block) torch_name2tf_name = {"1.0.weight": "b1_dw1/dw1/depthwise_kernel", "1.1.weight": "b1_dw1/bn/gamma", "1.1.bias": "b1_dw1/bn/beta", "1.1.running_mean": "b1_dw1/bn/moving_mean", "1.1.running_var": "b1_dw1/bn/moving_variance", "1.2.weight": "b1_conv1/conv1/kernel", "1.3.weight": "b1_conv1/bn/gamma", "1.3.bias": "b1_conv1/bn/beta", "1.3.running_mean": "b1_conv1/bn/moving_mean", "1.3.running_var": "b1_conv1/bn/moving_variance", "2.0.weight": "b2_conv1/conv1/kernel", "2.1.weight": "b2_conv1/bn/gamma", "2.1.bias": "b2_conv1/bn/beta", "2.1.running_mean": "b2_conv1/bn/moving_mean", "2.1.running_var": "b2_conv1/bn/moving_variance", "2.3.weight": "b2_dw1/dw1/depthwise_kernel", "2.4.weight": "b2_dw1/bn/gamma", "2.4.bias": "b2_dw1/bn/beta", "2.4.running_mean": "b2_dw1/bn/moving_mean", "2.4.running_var": "b2_dw1/bn/moving_variance", "2.5.weight": "b2_conv2/conv1/kernel", "2.6.weight": "b2_conv2/bn/gamma", "2.6.bias": "b2_conv2/bn/beta", "2.6.running_mean": "b2_conv2/bn/moving_mean", "2.6.running_var": "b2_conv2/bn/moving_variance"} tf_name_postfix = torch_name2tf_name[names[1]] tf_name = tf_name_prefix + tf_name_postfix if len(value.shape) > 1: # conv or dwconv if "dw" in tf_name: value = np.transpose(value.detach().numpy(), (2, 3, 0, 1)).astype(np.float32) else: value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32) new_weights_dict[tf_name] = value elif "conv5.0.weight" == key: value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32) new_weights_dict["conv5/conv1/kernel"] = value elif "conv5.1.weight" == key: new_weights_dict["conv5/bn/gamma"] = value elif "conv5.1.bias" == key: new_weights_dict["conv5/bn/beta"] = value elif "conv5.1.running_mean" == key: new_weights_dict["conv5/bn/moving_mean"] = value elif "conv5.1.running_var" == key: new_weights_dict["conv5/bn/moving_variance"] = value elif "fc.weight" == key: value = np.transpose(value.detach().numpy(), (1, 0)).astype(np.float32) new_weights_dict["fc/kernel"] = value elif "fc.bias" == key: new_weights_dict["fc/bias"] = value else: print(key) assert len(m_info) == len(new_weights_dict) weights_list = [] for name, shape in m_info: assert name in new_weights_dict, "not found key:'{}'".format(name) assert tuple(shape) == new_weights_dict[name].shape, \ "tf shape:'{}', trans shape:'{}'".format(shape, new_weights_dict[name].shape) weights_list.append(new_weights_dict[name]) m.set_weights(weights_list) m.save_weights("shufflenetv2_x1_0.h5", save_format="h5") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test7_shuffleNet/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, im_height: int, im_width: int, batch_size: int, val_rate: float = 0.1): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param im_height: 输入网络图像的高度 :param im_width: 输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :return: """ train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # image = tf.cast(image, tf.float32) # image = tf.image.resize(image, [im_height, im_width]) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) image = tf.image.random_flip_left_right(image) image = (image - mean) / std return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # image = tf.cast(image, tf.float32) # image = tf.image.resize(image, [im_height, im_width]) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) image = (image - mean) / std return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False): ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val) return train_ds, val_ds ================================================ FILE: tensorflow_classification/Test9_efficientNet/model.py ================================================ import math from typing import Union from tensorflow.keras import layers, Model CONV_KERNEL_INITIALIZER = { 'class_name': 'VarianceScaling', 'config': { 'scale': 2.0, 'mode': 'fan_out', 'distribution': 'truncated_normal' } } DENSE_KERNEL_INITIALIZER = { 'class_name': 'VarianceScaling', 'config': { 'scale': 1. / 3., 'mode': 'fan_out', 'distribution': 'uniform' } } def correct_pad(input_size: Union[int, tuple], kernel_size: int): """Returns a tuple for zero-padding for 2D convolution with downsampling. Arguments: input_size: Input tensor size. kernel_size: An integer or tuple/list of 2 integers. Returns: A tuple. """ if isinstance(input_size, int): input_size = (input_size, input_size) kernel_size = (kernel_size, kernel_size) adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) correct = (kernel_size[0] // 2, kernel_size[1] // 2) return ((correct[0] - adjust[0], correct[0]), (correct[1] - adjust[1], correct[1])) def block(inputs, activation: str = "swish", drop_rate: float = 0., name: str = "", input_channel: int = 32, output_channel: int = 16, kernel_size: int = 3, strides: int = 1, expand_ratio: int = 1, use_se: bool = True, se_ratio: float = 0.25): """An inverted residual block. Arguments: inputs: input tensor. activation: activation function. drop_rate: float between 0 and 1, fraction of the input units to drop. name: string, block label. input_channel: integer, the number of input filters. output_channel: integer, the number of output filters. kernel_size: integer, the dimension of the convolution window. strides: integer, the stride of the convolution. expand_ratio: integer, scaling coefficient for the input filters. use_se: whether to use se se_ratio: float between 0 and 1, fraction to squeeze the input filters. Returns: output tensor for the block. """ # Expansion phase filters = input_channel * expand_ratio if expand_ratio != 1: x = layers.Conv2D(filters=filters, kernel_size=1, padding="same", use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "expand_conv")(inputs) x = layers.BatchNormalization(name=name + "expand_bn")(x) x = layers.Activation(activation, name=name + "expand_activation")(x) else: x = inputs # Depthwise Convolution if strides == 2: x = layers.ZeroPadding2D(padding=correct_pad(filters, kernel_size), name=name + "dwconv_pad")(x) x = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=strides, padding="same" if strides == 1 else "valid", use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + "dwconv")(x) x = layers.BatchNormalization(name=name + "bn")(x) x = layers.Activation(activation, name=name + "activation")(x) if use_se: filters_se = int(input_channel * se_ratio) se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) se = layers.Reshape((1, 1, filters), name=name + "se_reshape")(se) se = layers.Conv2D(filters=filters_se, kernel_size=1, padding="same", activation=activation, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "se_reduce")(se) se = layers.Conv2D(filters=filters, kernel_size=1, padding="same", activation="sigmoid", kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "se_expand")(se) x = layers.multiply([x, se], name=name + "se_excite") # Output phase x = layers.Conv2D(filters=output_channel, kernel_size=1, padding="same", use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "project_conv")(x) x = layers.BatchNormalization(name=name + "project_bn")(x) if strides == 1 and input_channel == output_channel: if drop_rate > 0: x = layers.Dropout(rate=drop_rate, noise_shape=(None, 1, 1, 1), # binary dropout mask name=name + "drop")(x) x = layers.add([x, inputs], name=name + "add") return x def efficient_net(width_coefficient, depth_coefficient, input_shape=(224, 224, 3), dropout_rate=0.2, drop_connect_rate=0.2, activation="swish", model_name="efficientnet", include_top=True, num_classes=1000): """Instantiates the EfficientNet architecture using given scaling coefficients. Reference: - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( https://arxiv.org/abs/1905.11946) (ICML 2019) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Arguments: width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. input_shape: tuple, default input image shape(not including the batch size). dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. activation: activation function. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. num_classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A `keras.Model` instance. """ # kernel_size, repeats, in_channel, out_channel, exp_ratio, strides, SE block_args = [[3, 1, 32, 16, 1, 1, True], [3, 2, 16, 24, 6, 2, True], [5, 2, 24, 40, 6, 2, True], [3, 3, 40, 80, 6, 2, True], [5, 3, 80, 112, 6, 1, True], [5, 4, 112, 192, 6, 2, True], [3, 1, 192, 320, 6, 1, True]] def round_filters(filters, divisor=8): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) img_input = layers.Input(shape=input_shape) # data preprocessing x = layers.experimental.preprocessing.Rescaling(1. / 255.)(img_input) x = layers.experimental.preprocessing.Normalization()(x) # first conv2d x = layers.ZeroPadding2D(padding=correct_pad(input_shape[:2], 3), name="stem_conv_pad")(x) x = layers.Conv2D(filters=round_filters(32), kernel_size=3, strides=2, padding="valid", use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name="stem_conv")(x) x = layers.BatchNormalization(name="stem_bn")(x) x = layers.Activation(activation, name="stem_activation")(x) # build blocks b = 0 num_blocks = float(sum(round_repeats(i[1]) for i in block_args)) for i, args in enumerate(block_args): assert args[1] > 0 # Update block input and output filters based on depth multiplier. args[2] = round_filters(args[2]) # input_channel args[3] = round_filters(args[3]) # output_channel for j in range(round_repeats(args[1])): x = block(x, activation=activation, drop_rate=drop_connect_rate * b / num_blocks, name="block{}{}_".format(i + 1, chr(j + 97)), kernel_size=args[0], input_channel=args[2] if j == 0 else args[3], output_channel=args[3], expand_ratio=args[4], strides=args[5] if j == 0 else 1, use_se=args[6]) b += 1 # build top x = layers.Conv2D(round_filters(1280), kernel_size=1, padding="same", use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name="top_conv")(x) x = layers.BatchNormalization(name="top_bn")(x) x = layers.Activation(activation, name="top_activation")(x) if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) if dropout_rate > 0: x = layers.Dropout(dropout_rate, name="top_dropout")(x) x = layers.Dense(units=num_classes, activation="softmax", kernel_initializer=DENSE_KERNEL_INITIALIZER, name="predictions")(x) model = Model(img_input, x, name=model_name) return model def efficientnet_b0(num_classes=1000, include_top=True, input_shape=(224, 224, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb0.h5 return efficient_net(width_coefficient=1.0, depth_coefficient=1.0, input_shape=input_shape, dropout_rate=0.2, model_name="efficientnetb0", include_top=include_top, num_classes=num_classes) def efficientnet_b1(num_classes=1000, include_top=True, input_shape=(240, 240, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb1.h5 return efficient_net(width_coefficient=1.0, depth_coefficient=1.1, input_shape=input_shape, dropout_rate=0.2, model_name="efficientnetb1", include_top=include_top, num_classes=num_classes) def efficientnet_b2(num_classes=1000, include_top=True, input_shape=(260, 260, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb2.h5 return efficient_net(width_coefficient=1.1, depth_coefficient=1.2, input_shape=input_shape, dropout_rate=0.3, model_name="efficientnetb2", include_top=include_top, num_classes=num_classes) def efficientnet_b3(num_classes=1000, include_top=True, input_shape=(300, 300, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb3.h5 return efficient_net(width_coefficient=1.2, depth_coefficient=1.4, input_shape=input_shape, dropout_rate=0.3, model_name="efficientnetb3", include_top=include_top, num_classes=num_classes) def efficientnet_b4(num_classes=1000, include_top=True, input_shape=(380, 380, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb4.h5 return efficient_net(width_coefficient=1.4, depth_coefficient=1.8, input_shape=input_shape, dropout_rate=0.4, model_name="efficientnetb4", include_top=include_top, num_classes=num_classes) def efficientnet_b5(num_classes=1000, include_top=True, input_shape=(456, 456, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb5.h5 return efficient_net(width_coefficient=1.6, depth_coefficient=2.2, input_shape=input_shape, dropout_rate=0.4, model_name="efficientnetb5", include_top=include_top, num_classes=num_classes) def efficientnet_b6(num_classes=1000, include_top=True, input_shape=(528, 528, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb6.h5 return efficient_net(width_coefficient=1.8, depth_coefficient=2.6, input_shape=input_shape, dropout_rate=0.5, model_name="efficientnetb6", include_top=include_top, num_classes=num_classes) def efficientnet_b7(num_classes=1000, include_top=True, input_shape=(600, 600, 3)): # https://storage.googleapis.com/keras-applications/efficientnetb7.h5 return efficient_net(width_coefficient=2.0, depth_coefficient=3.1, input_shape=input_shape, dropout_rate=0.5, model_name="efficientnetb7", include_top=include_top, num_classes=num_classes) ================================================ FILE: tensorflow_classification/Test9_efficientNet/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import matplotlib.pyplot as plt from model import efficientnet_b0 as create_model def main(): num_classes = 5 img_size = {"B0": 224, "B1": 240, "B2": 260, "B3": 300, "B4": 380, "B5": 456, "B6": 528, "B7": 600} num_model = "B0" im_height = im_width = img_size[num_model] # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image to 224x224 img = img.resize((im_width, im_height)) plt.imshow(img) # read image img = np.array(img).astype(np.float32) # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=num_classes) weights_path = './save_weights/efficientnet.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img)) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test9_efficientNet/train.py ================================================ import os import sys import math import datetime import tensorflow as tf from tqdm import tqdm from model import efficientnet_b0 as create_model from utils import generate_ds assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") img_size = {"B0": 224, "B1": 240, "B2": 260, "B3": 300, "B4": 380, "B5": 456, "B6": 528, "B7": 600} num_model = "B0" im_height = im_width = img_size[num_model] batch_size = 16 epochs = 30 num_classes = 5 freeze_layers = True initial_lr = 0.01 log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size) # create model model = create_model(num_classes=num_classes) # load weights pre_weights_path = './efficientnetb0.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) # freeze bottom layers if freeze_layers: unfreeze_layers = ["top_conv", "top_bn", "predictions"] for layer in model.layers: if layer.name not in unfreeze_layers: layer.trainable = False else: print("training {}".format(layer.name)) model.summary() # custom learning rate curve def scheduler(now_epoch): end_lr_rate = 0.01 # end_lr = initial_lr * end_lr_rate rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate # cosine new_lr = rate * initial_lr # writing lr into tensorboard with train_writer.as_default(): tf.summary.scalar('learning rate', data=new_lr, step=epoch) return new_lr # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) loss = loss_object(train_labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # update learning rate optimizer.learning_rate = scheduler(epoch) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # writing training loss and acc with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) # writing validation loss and acc with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() save_name = "./save_weights/efficientnet.ckpt" model.save_weights(save_name, save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/Test9_efficientNet/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, im_height: int, im_width: int, batch_size: int, val_rate: float = 0.1): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param im_height: 输入网络图像的高度 :param im_width: 输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :return: """ train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) image = tf.image.random_flip_left_right(image) return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False): ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val) return train_ds, val_ds ================================================ FILE: tensorflow_classification/analyze_weights_featuremap/alexnet_model.py ================================================ from tensorflow.keras import layers, models, Model, Sequential def AlexNet_v1(im_height=224, im_width=224, class_num=1000): # tensorflow中的tensor通道排序是NHWC input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") # output(None, 224, 224, 3) x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image) # output(None, 227, 227, 3) x = layers.Conv2D(48, kernel_size=11, strides=4, activation="relu")(x) # output(None, 55, 55, 48) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 27, 27, 48) x = layers.Conv2D(128, kernel_size=5, padding="same", activation="relu")(x) # output(None, 27, 27, 128) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 13, 13, 128) x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 192) x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 192) x = layers.Conv2D(128, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 128) x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 6, 6, 128) x = layers.Flatten()(x) # output(None, 6*6*128) x = layers.Dropout(0.2)(x) x = layers.Dense(2048, activation="relu")(x) # output(None, 2048) x = layers.Dropout(0.2)(x) x = layers.Dense(2048, activation="relu")(x) # output(None, 2048) x = layers.Dense(class_num)(x) # output(None, 5) predict = layers.Softmax()(x) model = models.Model(inputs=input_image, outputs=predict) return model class AlexNet_v2(Model): def __init__(self, class_num=1000): super(AlexNet_v2, self).__init__() self.features = Sequential([ layers.ZeroPadding2D(((1, 2), (1, 2))), # output(None, 227, 227, 3) layers.Conv2D(48, kernel_size=11, strides=4, activation="relu"), # output(None, 55, 55, 48) layers.MaxPool2D(pool_size=3, strides=2), # output(None, 27, 27, 48) layers.Conv2D(128, kernel_size=5, padding="same", activation="relu"), # output(None, 27, 27, 128) layers.MaxPool2D(pool_size=3, strides=2), # output(None, 13, 13, 128) layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 192) layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 192) layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 128) layers.MaxPool2D(pool_size=3, strides=2)]) # output(None, 6, 6, 128) self.flatten = layers.Flatten() self.classifier = Sequential([ layers.Dropout(0.2), layers.Dense(1024, activation="relu"), # output(None, 2048) layers.Dropout(0.2), layers.Dense(128, activation="relu"), # output(None, 2048) layers.Dense(class_num), # output(None, 5) layers.Softmax() ]) def call(self, inputs, **kwargs): x = self.features(inputs) x = self.flatten(x) x = self.classifier(x) return x def receive_feature_map(self, x, layers_name): outputs = [] for module in self.features.layers: x = module(x) if module.name in layers_name: outputs.append(x) return outputs ================================================ FILE: tensorflow_classification/analyze_weights_featuremap/analyze_feature_map.py ================================================ from alexnet_model import AlexNet_v1, AlexNet_v2 from PIL import Image import numpy as np import matplotlib.pyplot as plt from tensorflow.keras import Model, Input im_height = 224 im_width = 224 # load image img = Image.open("../tulip.jpg") # resize image to 224x224 img = img.resize((im_width, im_height)) # scaling pixel value to (0-1) img = np.array(img) / 255. # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) model = AlexNet_v1(class_num=5) # functional api # model = AlexNet_v2(class_num=5) # subclass api # model.build((None, 224, 224, 3)) # If `by_name` is False weights are loaded based on the network's topology. model.load_weights("./myAlex.h5") # model.load_weights("./submodel.h5") # for layer in model.layers: # print(layer.name) model.summary() layers_name = ["conv2d", "conv2d_1"] # functional API try: input_node = model.input output_node = [model.get_layer(name=layer_name).output for layer_name in layers_name] model1 = Model(inputs=input_node, outputs=output_node) outputs = model1.predict(img) for index, feature_map in enumerate(outputs): # [N, H, W, C] -> [H, W, C] im = np.squeeze(feature_map) # show top 12 feature maps plt.figure() for i in range(12): ax = plt.subplot(3, 4, i + 1) # [H, W, C] plt.imshow(im[:, :, i], cmap='gray') plt.suptitle(layers_name[index]) plt.show() except Exception as e: print(e) # subclasses API # outputs = model.receive_feature_map(img, layers_name) # for index, feature_maps in enumerate(outputs): # # [N, H, W, C] -> [H, W, C] # im = np.squeeze(feature_maps) # # # show top 12 feature maps # plt.figure() # for i in range(12): # ax = plt.subplot(3, 4, i + 1) # # [H, W, C] # plt.imshow(im[:, :, i], cmap='gray') # plt.suptitle(layers_name[index]) # plt.show() ================================================ FILE: tensorflow_classification/analyze_weights_featuremap/analyze_kernel_weight.py ================================================ from alexnet_model import AlexNet_v1, AlexNet_v2 import numpy as np import matplotlib.pyplot as plt model = AlexNet_v1(class_num=5) # functional api # model = AlexNet_v2(class_num=5) # subclass api # model.build((None, 224, 224, 3)) model.load_weights("./myAlex.h5") # model.load_weights("./submodel.h5") model.summary() for layer in model.layers: for index, weight in enumerate(layer.weights): # [kernel_height, kernel_width, kernel_channel, kernel_number] weight_t = weight.numpy() # read a kernel information # k = weight_t[:, :, :, 0] # calculate mean, std, min, max weight_mean = weight_t.mean() weight_std = weight_t.std(ddof=1) weight_min = weight_t.min() weight_max = weight_t.max() print("mean is {}, std is {}, min is {}, max is {}".format(weight_mean, weight_std, weight_max, weight_min)) # plot hist image plt.close() weight_vec = np.reshape(weight_t, [-1]) plt.hist(weight_vec, bins=50) plt.title(weight.name) plt.show() ================================================ FILE: tensorflow_classification/custom_dataset/train_fit.py ================================================ import os import math import datetime import tensorflow as tf from utils import generate_ds def main(): data_root = "/home/wz/my_project/my_github/data_set/flower_data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") num_classes = 5 im_height = 224 im_width = 224 batch_size = 8 epochs = 20 log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size) # create base model base_model = tf.keras.applications.ResNet50(include_top=False, input_shape=(224, 224, 3), weights='imagenet') # freeze base model base_model.trainable = False base_model.summary() # create new model on top inputs = tf.keras.Input(shape=(224, 224, 3)) x = tf.keras.applications.resnet50.preprocess_input(inputs) x = base_model(x, training=False) x = tf.keras.layers.GlobalAveragePooling2D()(x) outputs = tf.keras.layers.Dense(num_classes)(x) model = tf.keras.Model(inputs, outputs) model.summary() model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) def scheduler(epoch): """ 自定义学习率变化 :param epoch: 当前训练epoch :return: """ initial_lr = 0.01 end_lr = 0.001 rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr # cosine new_lr = rate * initial_lr return new_lr callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/model_{epoch}.h5', save_best_only=True, save_weights_only=True, monitor='val_accuracy'), tf.keras.callbacks.TensorBoard(log_dir=log_dir, write_graph=True, histogram_freq=1), tf.keras.callbacks.LearningRateScheduler(schedule=scheduler)] model.fit(x=train_ds, epochs=epochs, validation_data=val_ds, callbacks=callbacks) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/custom_dataset/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, im_height: int, im_width: int, batch_size: int, val_rate: float = 0.1): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param im_height: 输入网络图像的高度 :param im_width: 输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :return: """ train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) # image = tf.image.convert_image_dtype(image, tf.float32) image = tf.cast(image, tf.float32) # image = tf.image.resize(image, [im_height, im_width]) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) image = tf.image.random_flip_left_right(image) return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) # image = tf.image.convert_image_dtype(image, tf.float32) image = tf.cast(image, tf.float32) # image = tf.image.resize(image, [im_height, im_width]) image = tf.image.resize_with_crop_or_pad(image, im_height, im_width) return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False): ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val) return train_ds, val_ds ================================================ FILE: tensorflow_classification/swin_transformer/model.py ================================================ import tensorflow as tf from tensorflow.keras import Model, layers, initializers import numpy as np class PatchEmbed(layers.Layer): """ 2D Image to Patch Embedding """ def __init__(self, patch_size=4, embed_dim=96, norm_layer=None): super(PatchEmbed, self).__init__() self.embed_dim = embed_dim self.patch_size = (patch_size, patch_size) self.norm = norm_layer(epsilon=1e-6, name="norm") if norm_layer else layers.Activation('linear') self.proj = layers.Conv2D(filters=embed_dim, kernel_size=patch_size, strides=patch_size, padding='SAME', kernel_initializer=initializers.LecunNormal(), bias_initializer=initializers.Zeros(), name="proj") def call(self, x, **kwargs): _, H, W, _ = x.shape # padding # 如果输入图片的H,W不是patch_size的整数倍,需要进行padding pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0) if pad_input: paddings = tf.constant([[0, 0], [0, self.patch_size[0] - H % self.patch_size[0]], [0, self.patch_size[1] - W % self.patch_size[1]]]) x = tf.pad(x, paddings) # 下采样patch_size倍 x = self.proj(x) B, H, W, C = x.shape # [B, H, W, C] -> [B, H*W, C] x = tf.reshape(x, [B, -1, C]) x = self.norm(x) return x, H, W def window_partition(x, window_size: int): """ 将feature map按照window_size划分成一个个没有重叠的window Args: x: (B, H, W, C) window_size (int): window size(M) Returns: windows: (num_windows*B, window_size, window_size, C) """ B, H, W, C = x.shape x = tf.reshape(x, [B, H // window_size, window_size, W // window_size, window_size, C]) # transpose: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C] # reshape: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C] x = tf.transpose(x, [0, 1, 3, 2, 4, 5]) windows = tf.reshape(x, [-1, window_size, window_size, C]) return windows def window_reverse(windows, window_size: int, H: int, W: int): """ 将一个个window还原成一个feature map Args: windows: (num_windows*B, window_size, window_size, C) window_size (int): Window size(M) H (int): Height of image W (int): Width of image Returns: x: (B, H, W, C) """ B = int(windows.shape[0] / (H * W / window_size / window_size)) # reshape: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C] x = tf.reshape(windows, [B, H // window_size, W // window_size, window_size, window_size, -1]) # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C] # reshape: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C] x = tf.transpose(x, [0, 1, 3, 2, 4, 5]) x = tf.reshape(x, [B, H, W, -1]) return x class PatchMerging(layers.Layer): def __init__(self, dim: int, norm_layer=layers.LayerNormalization, name=None): super(PatchMerging, self).__init__(name=name) self.dim = dim self.reduction = layers.Dense(2*dim, use_bias=False, kernel_initializer=initializers.TruncatedNormal(stddev=0.02), name="reduction") self.norm = norm_layer(epsilon=1e-6, name="norm") def call(self, x, H, W): """ x: [B, H*W, C] """ B, L, C = x.shape assert L == H * W, "input feature has wrong size" x = tf.reshape(x, [B, H, W, C]) # padding # 如果输入feature map的H,W不是2的整数倍,需要进行padding pad_input = (H % 2 != 0) or (W % 2 != 0) if pad_input: paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) x = tf.pad(x, paddings) x0 = x[:, 0::2, 0::2, :] # [B, H/2, W/2, C] x1 = x[:, 1::2, 0::2, :] # [B, H/2, W/2, C] x2 = x[:, 0::2, 1::2, :] # [B, H/2, W/2, C] x3 = x[:, 1::2, 1::2, :] # [B, H/2, W/2, C] x = tf.concat([x0, x1, x2, x3], -1) # [B, H/2, W/2, 4*C] x = tf.reshape(x, [B, -1, 4*C]) # [B, H/2*W/2, 4*C] x = self.norm(x) x = self.reduction(x) # [B, H/2*W/2, 2*C] return x class MLP(layers.Layer): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ k_ini = initializers.TruncatedNormal(stddev=0.02) b_ini = initializers.Zeros() def __init__(self, in_features, mlp_ratio=4.0, drop=0., name=None): super(MLP, self).__init__(name=name) self.fc1 = layers.Dense(int(in_features * mlp_ratio), name="fc1", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.act = layers.Activation("gelu") self.fc2 = layers.Dense(in_features, name="fc2", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.drop = layers.Dropout(drop) def call(self, x, training=None): x = self.fc1(x) x = self.act(x) x = self.drop(x, training=training) x = self.fc2(x) x = self.drop(x, training=training) return x class WindowAttention(layers.Layer): r""" Window based multi-head self attention (W-MSA) module with relative position bias. It supports both of shifted and non-shifted window. Args: dim (int): Number of input channels. window_size (tuple[int]): The height and width of the window. num_heads (int): Number of attention heads. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True attn_drop_ratio (float, optional): Dropout ratio of attention weight. Default: 0.0 proj_drop_ratio (float, optional): Dropout ratio of output. Default: 0.0 """ k_ini = initializers.GlorotUniform() b_ini = initializers.Zeros() def __init__(self, dim, window_size, num_heads=8, qkv_bias=False, attn_drop_ratio=0., proj_drop_ratio=0., name=None): super(WindowAttention, self).__init__(name=name) self.dim = dim self.window_size = window_size # [Mh, Mw] self.num_heads = num_heads head_dim = dim // num_heads self.scale = head_dim ** -0.5 self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias, name="qkv", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.attn_drop = layers.Dropout(attn_drop_ratio) self.proj = layers.Dense(dim, name="proj", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.proj_drop = layers.Dropout(proj_drop_ratio) def build(self, input_shape): # define a parameter table of relative position bias # [2*Mh-1 * 2*Mw-1, nH] self.relative_position_bias_table = self.add_weight( shape=[(2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1), self.num_heads], initializer=initializers.TruncatedNormal(stddev=0.02), trainable=True, dtype=tf.float32, name="relative_position_bias_table" ) coords_h = np.arange(self.window_size[0]) coords_w = np.arange(self.window_size[1]) coords = np.stack(np.meshgrid(coords_h, coords_w, indexing="ij")) # [2, Mh, Mw] coords_flatten = np.reshape(coords, [2, -1]) # [2, Mh*Mw] # [2, Mh*Mw, 1] - [2, 1, Mh*Mw] relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # [2, Mh*Mw, Mh*Mw] relative_coords = np.transpose(relative_coords, [1, 2, 0]) # [Mh*Mw, Mh*Mw, 2] relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 relative_coords[:, :, 1] += self.window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 relative_position_index = relative_coords.sum(-1) # [Mh*Mw, Mh*Mw] self.relative_position_index = tf.Variable(tf.convert_to_tensor(relative_position_index), trainable=False, dtype=tf.int64, name="relative_position_index") def call(self, x, mask=None, training=None): """ Args: x: input features with shape of (num_windows*B, Mh*Mw, C) mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None training: whether training mode """ # [batch_size*num_windows, Mh*Mw, total_embed_dim] B_, N, C = x.shape # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim] qkv = self.qkv(x) # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head] qkv = tf.reshape(qkv, [B_, N, 3, self.num_heads, C // self.num_heads]) # transpose: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] qkv = tf.transpose(qkv, [2, 0, 3, 1, 4]) # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] q, k, v = qkv[0], qkv[1], qkv[2] # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw] # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw] attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale # relative_position_bias(reshape): [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH] relative_position_bias = tf.gather(self.relative_position_bias_table, tf.reshape(self.relative_position_index, [-1])) relative_position_bias = tf.reshape(relative_position_bias, [self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1]) relative_position_bias = tf.transpose(relative_position_bias, [2, 0, 1]) # [nH, Mh*Mw, Mh*Mw] attn = attn + tf.expand_dims(relative_position_bias, 0) if mask is not None: # mask: [nW, Mh*Mw, Mh*Mw] nW = mask.shape[0] # num_windows # attn(reshape): [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw] # mask(expand_dim): [1, nW, 1, Mh*Mw, Mh*Mw] attn = tf.reshape(attn, [B_ // nW, nW, self.num_heads, N, N]) + tf.expand_dims(tf.expand_dims(mask, 1), 0) attn = tf.reshape(attn, [-1, self.num_heads, N, N]) attn = tf.nn.softmax(attn, axis=-1) attn = self.attn_drop(attn, training=training) # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head] x = tf.matmul(attn, v) # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head] x = tf.transpose(x, [0, 2, 1, 3]) # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim] x = tf.reshape(x, [B_, N, C]) x = self.proj(x) x = self.proj_drop(x, training=training) return x class SwinTransformerBlock(layers.Layer): r""" Swin Transformer Block. Args: dim (int): Number of input channels. num_heads (int): Number of attention heads. window_size (int): Window size. shift_size (int): Shift size for SW-MSA. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float, optional): Stochastic depth rate. Default: 0.0 """ def __init__(self, dim, num_heads, window_size=7, shift_size=0, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., name=None): super().__init__(name=name) self.dim = dim self.num_heads = num_heads self.window_size = window_size self.shift_size = shift_size self.mlp_ratio = mlp_ratio assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" self.norm1 = layers.LayerNormalization(epsilon=1e-6, name="norm1") self.attn = WindowAttention(dim, window_size=(window_size, window_size), num_heads=num_heads, qkv_bias=qkv_bias, attn_drop_ratio=attn_drop, proj_drop_ratio=drop, name="attn") self.drop_path = layers.Dropout(rate=drop_path, noise_shape=(None, 1, 1)) if drop_path > 0. \ else layers.Activation("linear") self.norm2 = layers.LayerNormalization(epsilon=1e-6, name="norm2") self.mlp = MLP(dim, drop=drop, name="mlp") def call(self, x, attn_mask, training=None): H, W = self.H, self.W B, L, C = x.shape assert L == H * W, "input feature has wrong size" shortcut = x x = self.norm1(x) x = tf.reshape(x, [B, H, W, C]) # pad feature maps to multiples of window size # 把feature map给pad到window size的整数倍 pad_r = (self.window_size - W % self.window_size) % self.window_size pad_b = (self.window_size - H % self.window_size) % self.window_size if pad_r > 0 or pad_b > 0: paddings = tf.constant([[0, 0], [0, pad_r], [0, pad_b], [0, 0]]) x = tf.pad(x, paddings) _, Hp, Wp, _ = x.shape # cyclic shift if self.shift_size > 0: shifted_x = tf.roll(x, shift=(-self.shift_size, -self.shift_size), axis=(1, 2)) else: shifted_x = x attn_mask = None # partition windows x_windows = window_partition(shifted_x, self.window_size) # [nW*B, Mh, Mw, C] x_windows = tf.reshape(x_windows, [-1, self.window_size * self.window_size, C]) # [nW*B, Mh*Mw, C] # W-MSA/SW-MSA attn_windows = self.attn(x_windows, mask=attn_mask, training=training) # [nW*B, Mh*Mw, C] # merge windows attn_windows = tf.reshape(attn_windows, [-1, self.window_size, self.window_size, C]) # [nW*B, Mh, Mw, C] shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # [B, H', W', C] # reverse cyclic shift if self.shift_size > 0: x = tf.roll(shifted_x, shift=(self.shift_size, self.shift_size), axis=(1, 2)) else: x = shifted_x if pad_r > 0 or pad_b > 0: # 把前面pad的数据移除掉 x = tf.slice(x, begin=[0, 0, 0, 0], size=[B, H, W, C]) x = tf.reshape(x, [B, H * W, C]) # FFN x = shortcut + self.drop_path(x, training=training) x = x + self.drop_path(self.mlp(self.norm2(x)), training=training) return x class BasicLayer(layers.Layer): """ A basic Swin Transformer layer for one stage. Args: dim (int): Number of input channels. depth (int): Number of blocks. num_heads (int): Number of attention heads. window_size (int): Local window size. mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True drop (float, optional): Dropout rate. Default: 0.0 attn_drop (float, optional): Attention dropout rate. Default: 0.0 drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 downsample (layer.Layer | None, optional): Downsample layer at the end of the layer. Default: None """ def __init__(self, dim, depth, num_heads, window_size, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., downsample=None, name=None): super().__init__(name=name) self.dim = dim self.depth = depth self.window_size = window_size self.shift_size = window_size // 2 # build blocks self.blocks = [ SwinTransformerBlock(dim=dim, num_heads=num_heads, window_size=window_size, shift_size=0 if (i % 2 == 0) else self.shift_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop, attn_drop=attn_drop, drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, name=f"block{i}") for i in range(depth) ] # patch merging layer if downsample is not None: self.downsample = downsample(dim=dim, name="downsample") else: self.downsample = None def create_mask(self, H, W): # calculate attention mask for SW-MSA # 保证Hp和Wp是window_size的整数倍 Hp = int(np.ceil(H / self.window_size)) * self.window_size Wp = int(np.ceil(W / self.window_size)) * self.window_size # 拥有和feature map一样的通道排列顺序,方便后续window_partition img_mask = np.zeros([1, Hp, Wp, 1]) # [1, Hp, Wp, 1] h_slices = (slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None)) w_slices = (slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None)) cnt = 0 for h in h_slices: for w in w_slices: img_mask[:, h, w, :] = cnt cnt += 1 img_mask = tf.convert_to_tensor(img_mask, dtype=tf.float32) mask_windows = window_partition(img_mask, self.window_size) # [nW, Mh, Mw, 1] mask_windows = tf.reshape(mask_windows, [-1, self.window_size * self.window_size]) # [nW, Mh*Mw] # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1] attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2) attn_mask = tf.where(attn_mask != 0, -100.0, attn_mask) attn_mask = tf.where(attn_mask == 0, 0.0, attn_mask) return attn_mask def call(self, x, H, W, training=None): attn_mask = self.create_mask(H, W) # [nW, Mh*Mw, Mh*Mw] for blk in self.blocks: blk.H, blk.W = H, W x = blk(x, attn_mask, training=training) if self.downsample is not None: x = self.downsample(x, H, W) H, W = (H + 1) // 2, (W + 1) // 2 return x, H, W class SwinTransformer(Model): r""" Swin Transformer A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - https://arxiv.org/pdf/2103.14030 Args: patch_size (int | tuple(int)): Patch size. Default: 4 num_classes (int): Number of classes for classification head. Default: 1000 embed_dim (int): Patch embedding dimension. Default: 96 depths (tuple(int)): Depth of each Swin Transformer layer. num_heads (tuple(int)): Number of attention heads in different layers. window_size (int): Window size. Default: 7 mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True drop_rate (float): Dropout rate. Default: 0 attn_drop_rate (float): Attention dropout rate. Default: 0 drop_path_rate (float): Stochastic depth rate. Default: 0.1 norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. patch_norm (bool): If True, add normalization after patch embedding. Default: True use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False """ def __init__(self, patch_size=4, num_classes=1000, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), window_size=7, mlp_ratio=4., qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=layers.LayerNormalization, name=None, **kwargs): super().__init__(name=name) self.num_classes = num_classes self.num_layers = len(depths) self.embed_dim = embed_dim self.mlp_ratio = mlp_ratio # split image into non-overlapping patches self.patch_embed = PatchEmbed(patch_size=patch_size, embed_dim=embed_dim, norm_layer=norm_layer) self.pos_drop = layers.Dropout(drop_rate) # stochastic depth decay rule dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))] # build layers self.stage_layers = [] for i_layer in range(self.num_layers): # 注意这里构建的stage和论文图中有些差异 # 这里的stage不包含该stage的patch_merging层,包含的是下个stage的 layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer), depth=depths[i_layer], num_heads=num_heads[i_layer], window_size=window_size, mlp_ratio=self.mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, name=f"layer{i_layer}") self.stage_layers.append(layer) self.norm = norm_layer(epsilon=1e-6, name="norm") self.head = layers.Dense(num_classes, kernel_initializer=initializers.TruncatedNormal(stddev=0.02), bias_initializer=initializers.Zeros(), name="head") def call(self, x, training=None): x, H, W = self.patch_embed(x) # x: [B, L, C] x = self.pos_drop(x, training=training) for layer in self.stage_layers: x, H, W = layer(x, H, W, training=training) x = self.norm(x) # [B, L, C] x = tf.reduce_mean(x, axis=1) x = self.head(x) return x def swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs): model = SwinTransformer(patch_size=4, window_size=7, embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), num_classes=num_classes, name="swin_tiny_patch4_window7", **kwargs) return model def swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=96, depths=(2, 2, 18, 2), num_heads=(3, 6, 12, 24), num_classes=num_classes, name="swin_small_patch4_window7", **kwargs) return model def swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, name="swin_base_patch4_window7", **kwargs) return model def swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, name="swin_base_patch4_window12", **kwargs) return model def swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, name="swin_base_patch4_window7", **kwargs) return model def swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=128, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), num_classes=num_classes, name="swin_base_patch4_window12", **kwargs) return model def swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=7, embed_dim=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), num_classes=num_classes, name="swin_large_patch4_window7", **kwargs) return model def swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs): model = SwinTransformer(in_chans=3, patch_size=4, window_size=12, embed_dim=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), num_classes=num_classes, name="swin_large_patch4_window12", **kwargs) return model ================================================ FILE: tensorflow_classification/swin_transformer/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import tensorflow as tf import matplotlib.pyplot as plt from model import swin_tiny_patch4_window7_224 as create_model def main(): num_classes = 5 im_height = im_width = 224 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image img = img.resize((im_width, im_height)) plt.imshow(img) # read image img = np.array(img).astype(np.float32) # preprocess img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=num_classes) model.build([1, im_height, im_width, 3]) weights_path = './save_weights/model.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img, batch_size=1)) result = tf.keras.layers.Softmax()(result) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/swin_transformer/train.py ================================================ import os import re import datetime import sys import tensorflow as tf from tqdm import tqdm from model import swin_tiny_patch4_window7_224 as create_model from utils import generate_ds assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") img_size = 224 batch_size = 8 epochs = 10 num_classes = 5 freeze_layers = False initial_lr = 0.0001 weight_decay = 1e-5 log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, train_im_width=img_size, train_im_height=img_size, batch_size=batch_size, val_rate=0.2) # create model model = create_model(num_classes=num_classes) model.build((1, img_size, img_size, 3)) # 下载我提前转好的预训练权重 # 链接: https://pan.baidu.com/s/1cHVwia2i3wD7-0Ueh2WmrQ 密码: sq8c # load weights pre_weights_path = './swin_tiny_patch4_window7_224.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) # freeze bottom layers if freeze_layers: for layer in model.layers: if "head" not in layer.name: layer.trainable = False else: print("training {}".format(layer.name)) model.summary() # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) # cross entropy loss ce_loss = loss_object(train_labels, output) # l2 loss matcher = re.compile(".*(bias|gamma|beta).*") l2loss = weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in model.trainable_variables if not matcher.match(v.name) ]) loss = ce_loss + l2loss gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(ce_loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # writing training loss and acc with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) # writing validation loss and acc with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() save_name = "./save_weights/model.ckpt" model.save_weights(save_name, save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/swin_transformer/trans_weights.py ================================================ import torch from model import * def main(weights_path: str, model_name: str, model: tf.keras.Model): var_dict = {v.name.split(':')[0]: v for v in model.weights} weights_dict = torch.load(weights_path, map_location="cpu")["model"] w_dict = {} for k, v in weights_dict.items(): if "patch_embed" in k: k = k.replace(".", "/") if "proj" in k: k = k.replace("proj/weight", "proj/kernel") if len(v.shape) > 1: # conv weights v = np.transpose(v.numpy(), (2, 3, 1, 0)).astype(np.float32) w_dict[k] = v else: # bias w_dict[k] = v elif "norm" in k: k = k.replace("weight", "gamma").replace("bias", "beta") w_dict[k] = v elif "layers" in k: k = k.replace("layers", "layer") split_k = k.split(".") layer_id = split_k[0] + split_k[1] if "block" in k: split_k[2] = "block" black_id = split_k[2] + split_k[3] k = "/".join([layer_id, black_id, *split_k[4:]]) if "attn" in k or "mlp" in k: k = k.replace("weight", "kernel") if "kernel" in k: v = np.transpose(v.numpy(), (1, 0)).astype(np.float32) elif "norm" in k: k = k.replace("weight", "gamma").replace("bias", "beta") w_dict[k] = v elif "downsample" in k: k = "/".join([layer_id, *split_k[2:]]) if "reduction" in k: k = k.replace("weight", "kernel") if "kernel" in k: v = np.transpose(v.numpy(), (1, 0)).astype(np.float32) elif "norm" in k: k = k.replace("weight", "gamma").replace("bias", "beta") w_dict[k] = v elif "norm" in k: k = k.replace(".", "/").replace("weight", "gamma").replace("bias", "beta") w_dict[k] = v elif "head" in k: k = k.replace(".", "/") k = k.replace("weight", "kernel") if "kernel" in k: v = np.transpose(v.numpy(), (1, 0)).astype(np.float32) w_dict[k] = v for key, var in var_dict.items(): if key in w_dict: if w_dict[key].shape != var.shape: msg = "shape mismatch: {}".format(key) print(msg) else: var.assign(w_dict[key], read_value=False) else: msg = "Not found {} in {}".format(key, weights_path) print(msg) model.save_weights("./{}.h5".format(model_name)) if __name__ == '__main__': model = swin_tiny_patch4_window7_224() model.build((1, 224, 224, 3)) # trained ImageNet-1K # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth main(weights_path="./swin_tiny_patch4_window7_224.pth", model_name="swin_tiny_patch4_window7_224", model=model) # model = swin_small_patch4_window7_224() # model.build((1, 224, 224, 3)) # # trained ImageNet-1K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth # main(weights_path="./swin_small_patch4_window7_224.pth", # model_name="swin_small_patch4_window7_224", # model=model) # model = swin_base_patch4_window7_224() # model.build((1, 224, 224, 3)) # # trained ImageNet-1K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth # main(weights_path="./swin_base_patch4_window7_224.pth", # model_name="swin_base_patch4_window7_224", # model=model) # model = swin_base_patch4_window12_384() # model.build((1, 384, 384, 3)) # # trained ImageNet-1K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth # main(weights_path="./swin_base_patch4_window12_384.pth", # model_name="swin_base_patch4_window12_384", # model=model) # model = swin_base_patch4_window7_224_in22k() # model.build((1, 224, 224, 3)) # # trained ImageNet-22K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth # main(weights_path="./swin_base_patch4_window7_224_22k.pth", # model_name="swin_base_patch4_window7_224_22k", # model=model) # model = swin_base_patch4_window12_384_in22k() # model.build((1, 384, 384, 3)) # # trained ImageNet-22K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth # main(weights_path="./swin_base_patch4_window12_384_22k.pth", # model_name="swin_base_patch4_window12_384_22k", # model=model) # model = swin_large_patch4_window7_224_in22k() # model.build((1, 224, 224, 3)) # # trained ImageNet-22K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth # main(weights_path="./swin_large_patch4_window7_224_22k.pth", # model_name="swin_large_patch4_window7_224_22k", # model=model) # model = swin_large_patch4_window12_384_in22k() # model.build((1, 384, 384, 3)) # # trained ImageNet-22K # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth # main(weights_path="./swin_large_patch4_window12_384_22k.pth", # model_name="swin_large_patch4_window12_384_22k", # model=model) ================================================ FILE: tensorflow_classification/swin_transformer/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, train_im_height: int = 224, train_im_width: int = 224, val_im_height: int = None, val_im_width: int = None, batch_size: int = 8, val_rate: float = 0.1, cache_data: bool = False): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param train_im_height: 训练输入网络图像的高度 :param train_im_width: 训练输入网络图像的宽度 :param val_im_height: 验证输入网络图像的高度 :param val_im_width: 验证输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :param cache_data: 是否缓存数据 :return: """ assert train_im_height is not None assert train_im_width is not None if val_im_width is None: val_im_width = train_im_width if val_im_height is None: val_im_height = train_im_height train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width) image = tf.image.random_flip_left_right(image) image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width) image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225] return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False, cache: bool = False): if cache: ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val, cache=False) return train_ds, val_ds ================================================ FILE: tensorflow_classification/tensorboard_test/train_fit.py ================================================ import json import os import math import datetime import tensorflow as tf from tensorflow.keras.preprocessing.image import ImageDataGenerator assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) if not os.path.exists("./save_weights"): os.makedirs("./save_weights") num_classes = 5 im_height = 224 im_width = 224 batch_size = 8 epochs = 20 log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # data generator with data augmentation train_image_generator = ImageDataGenerator(horizontal_flip=True) validation_image_generator = ImageDataGenerator() train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) # create base model base_model = tf.keras.applications.ResNet50(include_top=False, input_shape=(224, 224, 3), weights='imagenet') # freeze base model base_model.trainable = False base_model.summary() # create new model on top inputs = tf.keras.Input(shape=(224, 224, 3)) x = tf.keras.applications.resnet50.preprocess_input(inputs) x = base_model(x, training=False) x = tf.keras.layers.GlobalAveragePooling2D()(x) outputs = tf.keras.layers.Dense(num_classes)(x) model = tf.keras.Model(inputs, outputs) model.summary() model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=[tf.keras.metrics.CategoricalAccuracy("accuracy")]) # 自定义学习率变化 def scheduler(epoch): initial_lr = 0.01 end_lr = 0.001 rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr # cosine new_lr = rate * initial_lr return new_lr callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/model_{epoch}.h5', save_best_only=True, save_weights_only=True, monitor=tf.keras.metrics.CategoricalAccuracy("accuracy").name), tf.keras.callbacks.TensorBoard(log_dir=log_dir, write_graph=True, histogram_freq=1), tf.keras.callbacks.LearningRateScheduler(schedule=scheduler)] model.fit(x=train_data_gen, epochs=epochs, validation_data=val_data_gen, callbacks=callbacks) if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/tensorboard_test/train_not_fit.py ================================================ import json import os import math import datetime import tensorflow as tf from tensorflow.keras.preprocessing.image import ImageDataGenerator from tqdm import tqdm assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path train_dir = os.path.join(image_path, "train") validation_dir = os.path.join(image_path, "val") assert os.path.exists(train_dir), "cannot find {}".format(train_dir) assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir) if not os.path.exists("./save_weights"): os.makedirs("./save_weights") num_classes = 5 im_height = 224 im_width = 224 batch_size = 16 epochs = 20 log_dir = "./logs/not_fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_image_generator = ImageDataGenerator(horizontal_flip=True) validation_image_generator = ImageDataGenerator() train_data_gen = train_image_generator.flow_from_directory(directory=train_dir, batch_size=batch_size, shuffle=True, target_size=(im_height, im_width), class_mode='categorical') total_train = train_data_gen.n # get class dict class_indices = train_data_gen.class_indices # transform value and key of dict inverse_dict = dict((val, key) for key, val in class_indices.items()) # write dict into json file json_str = json.dumps(inverse_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir, batch_size=batch_size, shuffle=False, target_size=(im_height, im_width), class_mode='categorical') total_val = val_data_gen.n print("using {} images for training, {} images for validation.".format(total_train, total_val)) # create base model base_model = tf.keras.applications.ResNet50(include_top=False, input_shape=(224, 224, 3), weights='imagenet') # freeze base model base_model.trainable = False base_model.summary() # create new model on top inputs = tf.keras.Input(shape=(224, 224, 3)) x = tf.keras.applications.resnet50.preprocess_input(inputs) x = base_model(x, training=False) x = tf.keras.layers.GlobalAveragePooling2D()(x) outputs = tf.keras.layers.Dense(num_classes)(x) model = tf.keras.Model(inputs, outputs) model.summary() # 自定义学习率变化 def scheduler(epoch): initial_lr = 0.01 end_lr = 0.001 rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr # cosine new_lr = rate * initial_lr with train_writer.as_default(): tf.summary.scalar('learning rate', data=new_lr, step=epoch) return new_lr # using keras low level api for training loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: output = model(images, training=True) loss = loss_object(labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) @tf.function def test_step(images, labels): output = model(images, training=False) t_loss = loss_object(labels, output) val_loss(t_loss) val_accuracy(labels, output) best_val_accuracy = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info print("Epoch [{}/{}]".format(epoch + 1, epochs)) # train train_bar = tqdm(train_data_gen, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train_loss:{:.3f}, train_acc:{:.3f}".format(train_loss.result(), train_accuracy.result()) # update learning rate optimizer.learning_rate = scheduler(epoch) # validation val_bar = tqdm(val_data_gen, file=sys.stdout) for test_images, test_labels in val_bar: test_step(test_images, test_labels) # print val process val_bar.desc = "val_loss:{:.3f}, val_acc:{:.3f}".format(val_loss.result(), val_accuracy.result()) with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) if val_accuracy.result() > best_val_accuracy: best_val_accuracy = val_accuracy.result() model.save_weights("./save_weights/model_{}.ckpt".format(epoch), save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/vision_transformer/predict.py ================================================ import os import json import glob import numpy as np from PIL import Image import tensorflow as tf import matplotlib.pyplot as plt from vit_model import vit_base_patch16_224_in21k as create_model def main(): num_classes = 5 im_height = im_width = 224 # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path) img = Image.open(img_path) # resize image img = img.resize((im_width, im_height)) plt.imshow(img) # read image img = np.array(img).astype(np.float32) # preprocess img = (img / 255. - 0.5) / 0.5 # Add the image to a batch where it's the only member. img = (np.expand_dims(img, 0)) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) # create model model = create_model(num_classes=num_classes, has_logits=False) model.build([1, 224, 224, 3]) weights_path = './save_weights/model.ckpt' assert len(glob.glob(weights_path+"*")), "cannot find {}".format(weights_path) model.load_weights(weights_path) result = np.squeeze(model.predict(img, batch_size=1)) result = tf.keras.layers.Softmax()(result) predict_class = np.argmax(result) print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)], result[predict_class]) plt.title(print_res) for i in range(len(result)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], result[i])) plt.show() if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/vision_transformer/train.py ================================================ import os import re import sys import math import datetime import tensorflow as tf from tqdm import tqdm from vit_model import vit_base_patch16_224_in21k as create_model from utils import generate_ds assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0" def main(): data_root = "/data/flower_photos" # get data root path if not os.path.exists("./save_weights"): os.makedirs("./save_weights") batch_size = 8 epochs = 10 num_classes = 5 freeze_layers = True initial_lr = 0.001 weight_decay = 1e-4 log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train")) val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val")) # data generator with data augmentation train_ds, val_ds = generate_ds(data_root, batch_size=batch_size, val_rate=0.2) # create model model = create_model(num_classes=num_classes, has_logits=False) model.build((1, 224, 224, 3)) # 下载我提前转好的预训练权重 # 链接: https://pan.baidu.com/s/1ro-6bebc8zroYfupn-7jVQ 密码: s9d9 # load weights pre_weights_path = './ViT-B_16.h5' assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path) model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True) # freeze bottom layers if freeze_layers: for layer in model.layers: if "pre_logits" not in layer.name and "head" not in layer.name: layer.trainable = False else: print("training {}".format(layer.name)) model.summary() # custom learning rate curve def scheduler(now_epoch): end_lr_rate = 0.01 # end_lr = initial_lr * end_lr_rate rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate # cosine new_lr = rate * initial_lr # writing lr into tensorboard with train_writer.as_default(): tf.summary.scalar('learning rate', data=new_lr, step=epoch) return new_lr # using keras low level api for training loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') @tf.function def train_step(train_images, train_labels): with tf.GradientTape() as tape: output = model(train_images, training=True) # cross entropy loss ce_loss = loss_object(train_labels, output) # l2 loss matcher = re.compile(".*(bias|gamma|beta).*") l2loss = weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in model.trainable_variables if not matcher.match(v.name) ]) loss = ce_loss + l2loss gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(ce_loss) train_accuracy(train_labels, output) @tf.function def val_step(val_images, val_labels): output = model(val_images, training=False) loss = loss_object(val_labels, output) val_loss(loss) val_accuracy(val_labels, output) best_val_acc = 0. for epoch in range(epochs): train_loss.reset_states() # clear history info train_accuracy.reset_states() # clear history info val_loss.reset_states() # clear history info val_accuracy.reset_states() # clear history info # train train_bar = tqdm(train_ds, file=sys.stdout) for images, labels in train_bar: train_step(images, labels) # print train process train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, train_loss.result(), train_accuracy.result()) # update learning rate optimizer.learning_rate = scheduler(epoch) # validate val_bar = tqdm(val_ds, file=sys.stdout) for images, labels in val_bar: val_step(images, labels) # print val process val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1, epochs, val_loss.result(), val_accuracy.result()) # writing training loss and acc with train_writer.as_default(): tf.summary.scalar("loss", train_loss.result(), epoch) tf.summary.scalar("accuracy", train_accuracy.result(), epoch) # writing validation loss and acc with val_writer.as_default(): tf.summary.scalar("loss", val_loss.result(), epoch) tf.summary.scalar("accuracy", val_accuracy.result(), epoch) # only save best weights if val_accuracy.result() > best_val_acc: best_val_acc = val_accuracy.result() save_name = "./save_weights/model.ckpt" model.save_weights(save_name, save_format="tf") if __name__ == '__main__': main() ================================================ FILE: tensorflow_classification/vision_transformer/trans_weights.py ================================================ from vit_model import * def main(weights_path: str, model_name: str, model: tf.keras.Model): var_dict = {v.name.split(':')[0]: v for v in model.weights} ckpt_dict = np.load(weights_path, allow_pickle=False) # keys, values = zip(*list(ckpt_dict.items())) w_dict = {} for k, v in ckpt_dict.items(): key_ = k.replace("Transformer/", "").\ replace("MultiHeadDotProductAttention_1", "MultiHeadAttention").\ replace("MlpBlock_3", "MlpBlock").\ replace("posembed_input/pos_embedding", "pos_embed").\ replace("encoder_norm/bias", "encoder_norm/beta").\ replace("encoder_norm/scale", "encoder_norm/gamma").\ replace("LayerNorm_0/bias", "LayerNorm_0/beta").\ replace("LayerNorm_0/scale", "LayerNorm_0/gamma"). \ replace("LayerNorm_2/bias", "LayerNorm_1/beta"). \ replace("LayerNorm_2/scale", "LayerNorm_1/gamma").\ replace("embedding", "patch_embed/conv2d") w_dict[key_] = v for i in range(model.depth): q_kernel = w_dict.pop("encoderblock_{}/MultiHeadAttention/query/kernel".format(i)) k_kernel = w_dict.pop("encoderblock_{}/MultiHeadAttention/key/kernel".format(i)) v_kernel = w_dict.pop("encoderblock_{}/MultiHeadAttention/value/kernel".format(i)) q_kernel = np.reshape(q_kernel, [q_kernel.shape[0], -1]) k_kernel = np.reshape(k_kernel, [k_kernel.shape[0], -1]) v_kernel = np.reshape(v_kernel, [v_kernel.shape[0], -1]) qkv_kernel = np.concatenate([q_kernel, k_kernel, v_kernel], axis=1) w_dict["encoderblock_{}/MultiHeadAttention/qkv/kernel".format(i)] = qkv_kernel if model.qkv_bias: q_bias = w_dict.pop("encoderblock_{}/MultiHeadAttention/query/bias".format(i)) k_bias = w_dict.pop("encoderblock_{}/MultiHeadAttention/key/bias".format(i)) v_bias = w_dict.pop("encoderblock_{}/MultiHeadAttention/value/bias".format(i)) q_bias = np.reshape(q_bias, [-1]) k_bias = np.reshape(k_bias, [-1]) v_bias = np.reshape(v_bias, [-1]) qkv_bias = np.concatenate([q_bias, k_bias, v_bias], axis=0) w_dict["encoderblock_{}/MultiHeadAttention/qkv/bias".format(i)] = qkv_bias out_kernel = w_dict["encoderblock_{}/MultiHeadAttention/out/kernel".format(i)] out_kernel = np.reshape(out_kernel, [-1, out_kernel.shape[-1]]) w_dict["encoderblock_{}/MultiHeadAttention/out/kernel".format(i)] = out_kernel for key, var in var_dict.items(): if key in w_dict: if w_dict[key].shape != var.shape: msg = "shape mismatch: {}".format(key) print(msg) else: var.assign(w_dict[key], read_value=False) else: msg = "Not found {} in {}".format(key, weights_path) print(msg) model.save_weights("./{}.h5".format(model_name)) if __name__ == '__main__': model = vit_base_patch16_224_in21k() model.build((1, 224, 224, 3)) # https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_16.npz main(weights_path="./ViT-B_16.npz", model_name="ViT-B_16", model=model) # model = vit_base_patch32_224_in21k() # model.build((1, 224, 224, 3)) # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_32.npz # main(weights_path="./ViT-B_32.npz", # model_name="ViT-B_32", # model=model) # model = vit_large_patch16_224_in21k() # model.build((1, 224, 224, 3)) # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-L_16.npz # main(weights_path="./ViT-L_16.npz", # model_name="ViT-L_16", # model=model) # model = vit_large_patch32_224_in21k() # model.build((1, 224, 224, 3)) # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-L_32.npz # main(weights_path="./ViT-L_32.npz", # model_name="ViT-L_32", # model=model) ================================================ FILE: tensorflow_classification/vision_transformer/utils.py ================================================ import os import json import random import tensorflow as tf import matplotlib.pyplot as plt def read_split_data(root: str, val_rate: float = 0.2): random.seed(0) # 保证随机划分结果一致 assert os.path.exists(root), "dataset root: {} does not exist.".format(root) # 遍历文件夹,一个文件夹对应一个类别 flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))] # 排序,保证顺序一致 flower_class.sort() # 生成类别名称以及对应的数字索引 class_indices = dict((k, v) for v, k in enumerate(flower_class)) json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) train_images_path = [] # 存储训练集的所有图片路径 train_images_label = [] # 存储训练集图片对应索引信息 val_images_path = [] # 存储验证集的所有图片路径 val_images_label = [] # 存储验证集图片对应索引信息 every_class_num = [] # 存储每个类别的样本总数 supported = [".jpg", ".JPG", ".jpeg", ".JPEG"] # 支持的文件后缀类型 # 遍历每个文件夹下的文件 for cla in flower_class: cla_path = os.path.join(root, cla) # 遍历获取supported支持的所有文件路径 images = [os.path.join(root, cla, i) for i in os.listdir(cla_path) if os.path.splitext(i)[-1] in supported] # 获取该类别对应的索引 image_class = class_indices[cla] # 记录该类别的样本数量 every_class_num.append(len(images)) # 按比例随机采样验证样本 val_path = random.sample(images, k=int(len(images) * val_rate)) for img_path in images: if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集 val_images_path.append(img_path) val_images_label.append(image_class) else: # 否则存入训练集 train_images_path.append(img_path) train_images_label.append(image_class) print("{} images were found in the dataset.\n{} for training, {} for validation".format(sum(every_class_num), len(train_images_path), len(val_images_path) )) plot_image = False if plot_image: # 绘制每种类别个数柱状图 plt.bar(range(len(flower_class)), every_class_num, align='center') # 将横坐标0,1,2,3,4替换为相应的类别名称 plt.xticks(range(len(flower_class)), flower_class) # 在柱状图上添加数值标签 for i, v in enumerate(every_class_num): plt.text(x=i, y=v + 5, s=str(v), ha='center') # 设置x坐标 plt.xlabel('image class') # 设置y坐标 plt.ylabel('number of images') # 设置柱状图的标题 plt.title('flower class distribution') plt.show() return train_images_path, train_images_label, val_images_path, val_images_label def generate_ds(data_root: str, train_im_height: int = 224, train_im_width: int = 224, val_im_height: int = None, val_im_width: int = None, batch_size: int = 8, val_rate: float = 0.1, cache_data: bool = False): """ 读取划分数据集,并生成训练集和验证集的迭代器 :param data_root: 数据根目录 :param train_im_height: 训练输入网络图像的高度 :param train_im_width: 训练输入网络图像的宽度 :param val_im_height: 验证输入网络图像的高度 :param val_im_width: 验证输入网络图像的宽度 :param batch_size: 训练使用的batch size :param val_rate: 将数据按给定比例划分到验证集 :param cache_data: 是否缓存数据 :return: """ assert train_im_height is not None assert train_im_width is not None if val_im_width is None: val_im_width = train_im_width if val_im_height is None: val_im_height = train_im_height train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate) AUTOTUNE = tf.data.experimental.AUTOTUNE def process_train_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width) image = tf.image.random_flip_left_right(image) image = (image / 255. - 0.5) / 0.5 return image, label def process_val_info(img_path, label): image = tf.io.read_file(img_path) image = tf.image.decode_jpeg(image, channels=3) image = tf.cast(image, tf.float32) image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width) image = (image / 255. - 0.5) / 0.5 return image, label # Configure dataset for performance def configure_for_performance(ds, shuffle_size: int, shuffle: bool = False, cache: bool = False): if cache: ds = ds.cache() # 读取数据后缓存至内存 if shuffle: ds = ds.shuffle(buffer_size=shuffle_size) # 打乱数据顺序 ds = ds.batch(batch_size) # 指定batch size ds = ds.prefetch(buffer_size=AUTOTUNE) # 在训练的同时提前准备下一个step的数据 return ds train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path), tf.constant(train_img_label))) total_train = len(train_img_path) # Use Dataset.map to create a dataset of image, label pairs train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE) train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data) val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path), tf.constant(val_img_label))) total_val = len(val_img_path) # Use Dataset.map to create a dataset of image, label pairs val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE) val_ds = configure_for_performance(val_ds, total_val, cache=False) return train_ds, val_ds ================================================ FILE: tensorflow_classification/vision_transformer/vit_model.py ================================================ """ refer to: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py """ import tensorflow as tf from tensorflow.keras import Model, layers, initializers import numpy as np class PatchEmbed(layers.Layer): """ 2D Image to Patch Embedding """ def __init__(self, img_size=224, patch_size=16, embed_dim=768): super(PatchEmbed, self).__init__() self.embed_dim = embed_dim self.img_size = (img_size, img_size) self.grid_size = (img_size // patch_size, img_size // patch_size) self.num_patches = self.grid_size[0] * self.grid_size[1] self.proj = layers.Conv2D(filters=embed_dim, kernel_size=patch_size, strides=patch_size, padding='SAME', kernel_initializer=initializers.LecunNormal(), bias_initializer=initializers.Zeros()) def call(self, inputs, **kwargs): B, H, W, C = inputs.shape assert H == self.img_size[0] and W == self.img_size[1], \ f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." x = self.proj(inputs) # [B, H, W, C] -> [B, H*W, C] x = tf.reshape(x, [B, self.num_patches, self.embed_dim]) return x class ConcatClassTokenAddPosEmbed(layers.Layer): def __init__(self, embed_dim=768, num_patches=196, name=None): super(ConcatClassTokenAddPosEmbed, self).__init__(name=name) self.embed_dim = embed_dim self.num_patches = num_patches def build(self, input_shape): self.cls_token = self.add_weight(name="cls", shape=[1, 1, self.embed_dim], initializer=initializers.Zeros(), trainable=True, dtype=tf.float32) self.pos_embed = self.add_weight(name="pos_embed", shape=[1, self.num_patches + 1, self.embed_dim], initializer=initializers.RandomNormal(stddev=0.02), trainable=True, dtype=tf.float32) def call(self, inputs, **kwargs): batch_size, _, _ = inputs.shape # [1, 1, 768] -> [B, 1, 768] cls_token = tf.broadcast_to(self.cls_token, shape=[batch_size, 1, self.embed_dim]) x = tf.concat([cls_token, inputs], axis=1) # [B, 197, 768] x = x + self.pos_embed return x class Attention(layers.Layer): k_ini = initializers.GlorotUniform() b_ini = initializers.Zeros() def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop_ratio=0., proj_drop_ratio=0., name=None): super(Attention, self).__init__(name=name) self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias, name="qkv", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.attn_drop = layers.Dropout(attn_drop_ratio) self.proj = layers.Dense(dim, name="out", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.proj_drop = layers.Dropout(proj_drop_ratio) def call(self, inputs, training=None): # [batch_size, num_patches + 1, total_embed_dim] B, N, C = inputs.shape # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim] qkv = self.qkv(inputs) # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head] qkv = tf.reshape(qkv, [B, N, 3, self.num_heads, C // self.num_heads]) # transpose: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head] qkv = tf.transpose(qkv, [2, 0, 3, 1, 4]) # [batch_size, num_heads, num_patches + 1, embed_dim_per_head] q, k, v = qkv[0], qkv[1], qkv[2] # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1] # multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1] attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale attn = tf.nn.softmax(attn, axis=-1) attn = self.attn_drop(attn, training=training) # multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head] x = tf.matmul(attn, v) # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head] x = tf.transpose(x, [0, 2, 1, 3]) # reshape: -> [batch_size, num_patches + 1, total_embed_dim] x = tf.reshape(x, [B, N, C]) x = self.proj(x) x = self.proj_drop(x, training=training) return x class MLP(layers.Layer): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ k_ini = initializers.GlorotUniform() b_ini = initializers.RandomNormal(stddev=1e-6) def __init__(self, in_features, mlp_ratio=4.0, drop=0., name=None): super(MLP, self).__init__(name=name) self.fc1 = layers.Dense(int(in_features * mlp_ratio), name="Dense_0", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.act = layers.Activation("gelu") self.fc2 = layers.Dense(in_features, name="Dense_1", kernel_initializer=self.k_ini, bias_initializer=self.b_ini) self.drop = layers.Dropout(drop) def call(self, inputs, training=None): x = self.fc1(inputs) x = self.act(x) x = self.drop(x, training=training) x = self.fc2(x) x = self.drop(x, training=training) return x class Block(layers.Layer): def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0., name=None): super(Block, self).__init__(name=name) self.norm1 = layers.LayerNormalization(epsilon=1e-6, name="LayerNorm_0") self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio, name="MultiHeadAttention") # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here self.drop_path = layers.Dropout(rate=drop_path_ratio, noise_shape=(None, 1, 1)) if drop_path_ratio > 0. \ else layers.Activation("linear") self.norm2 = layers.LayerNormalization(epsilon=1e-6, name="LayerNorm_1") self.mlp = MLP(dim, drop=drop_ratio, name="MlpBlock") def call(self, inputs, training=None): x = inputs + self.drop_path(self.attn(self.norm1(inputs)), training=training) x = x + self.drop_path(self.mlp(self.norm2(x)), training=training) return x class VisionTransformer(Model): def __init__(self, img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=True, qk_scale=None, drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0., representation_size=None, num_classes=1000, name="ViT-B/16"): super(VisionTransformer, self).__init__(name=name) self.num_classes = num_classes self.embed_dim = embed_dim self.depth = depth self.qkv_bias = qkv_bias self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token_pos_embed = ConcatClassTokenAddPosEmbed(embed_dim=embed_dim, num_patches=num_patches, name="cls_pos") self.pos_drop = layers.Dropout(drop_ratio) dpr = np.linspace(0., drop_path_ratio, depth) # stochastic depth decay rule self.blocks = [Block(dim=embed_dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i], name="encoderblock_{}".format(i)) for i in range(depth)] self.norm = layers.LayerNormalization(epsilon=1e-6, name="encoder_norm") if representation_size: self.has_logits = True self.pre_logits = layers.Dense(representation_size, activation="tanh", name="pre_logits") else: self.has_logits = False self.pre_logits = layers.Activation("linear") self.head = layers.Dense(num_classes, name="head", kernel_initializer=initializers.Zeros()) def call(self, inputs, training=None): # [B, H, W, C] -> [B, num_patches, embed_dim] x = self.patch_embed(inputs) # [B, 196, 768] x = self.cls_token_pos_embed(x) # [B, 176, 768] x = self.pos_drop(x, training=training) for block in self.blocks: x = block(x, training=training) x = self.norm(x) x = self.pre_logits(x[:, 0]) x = self.head(x) return x def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12, representation_size=768 if has_logits else None, num_classes=num_classes, name="ViT-B_16") return model def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=768, depth=12, num_heads=12, representation_size=768 if has_logits else None, num_classes=num_classes, name="ViT-B_32") return model def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ model = VisionTransformer(img_size=224, patch_size=16, embed_dim=1024, depth=24, num_heads=16, representation_size=1024 if has_logits else None, num_classes=num_classes, name="ViT-L_16") return model def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ model = VisionTransformer(img_size=224, patch_size=32, embed_dim=1024, depth=24, num_heads=16, representation_size=1024 if has_logits else None, num_classes=num_classes, name="ViT-L_32") return model def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True): """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929). ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer. """ model = VisionTransformer(img_size=224, patch_size=14, embed_dim=1280, depth=32, num_heads=16, representation_size=1280 if has_logits else None, num_classes=num_classes, name="ViT-H_14") return model